This is a continuation of Part I where I use only $R=2$ conditions, 1 causal SNP of PVE = 0.05, with simple singleton, identity and fully shared patterns. The goal is to ensure all computations are correct.
Results below seems to make sense, but still:
shared
compared to identity
is minimaltop_hit_rate
is lower for shared
compared to identity
which is a bit puzzling./finemap.dsc --target sanity_check -o sanity_check
%cd ~/GIT/github/mnm-twas/dsc
library('dscrutils')
out = dscquery('sanity_check', "hundred_data.dataset sharing_pattern.n_signal susie_scores.total susie_scores.valid susie_scores.size susie_scores.purity susie_scores.top", groups="sharing_pattern: singleton, identity, shared")
head(out)
out[,c(4,5,6,7,8,9)] = as.numeric(as.matrix(out[,c(4,5,6,7,8,9)]))
res = out[,c(3,4,5,6,7,8,9)]
colnames(res) = c('pattern', 'total_true', 'total', 'valid', 'size', 'purity', 'top_hit')
aggregate(purity~pattern, res, mean)
aggregate(size~pattern, res, median)
valid = aggregate(valid ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
power = merge(valid, total_true, by = "pattern")
power$power = power$valid/power$total_true
power
valid = aggregate(valid ~ pattern, res, sum)
total = aggregate(total ~ pattern, res, sum)
fdr = merge(valid, total, by = "pattern")
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr
top_hit = aggregate(top_hit ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
top_rate = merge(top_hit, total_true, by = "pattern")
top_rate$top_rate = top_rate$top_hit/top_rate$total_true
top_rate