This is a continuation of Part III where instead of looking at 1 causal SNP of PVE = 0.05 I look at a range of causal SNPs per gene with 50% having 1 causal, 30% two causal and 20% three causal. The total PVE is set to 0.15.
identity
is still better than singleton
but result for shared
does not make sense.
./finemap.dsc --target sanity_check -o sanity_check2 -c 39
%cd ~/GIT/github/mnm-twas/dsc
library('dscrutils')
out = dscquery('sanity_check2', "hundred_data.dataset sharing_pattern.n_signal susie_scores.total susie_scores.valid susie_scores.size susie_scores.purity susie_scores.top", groups="sharing_pattern: singleton, identity, shared")
head(out)
out[,c(4,5,6,7,8,9)] = as.numeric(as.matrix(out[,c(4,5,6,7,8,9)]))
res = out[,c(3,4,5,6,7,8,9)]
colnames(res) = c('pattern', 'total_true', 'total', 'valid', 'size', 'purity', 'top_hit')
aggregate(purity~pattern, res, mean)
aggregate(size~pattern, res, median)
valid = aggregate(valid ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
power = merge(valid, total_true, by = "pattern")
power$power = power$valid/power$total_true
power
valid = aggregate(valid ~ pattern, res, sum)
total = aggregate(total ~ pattern, res, sum)
fdr = merge(valid, total, by = "pattern")
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr
top_hit = aggregate(top_hit ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
top_rate = merge(top_hit, total_true, by = "pattern")
top_rate$top_rate = top_rate$top_hit/top_rate$total_true
top_rate