This is a continuation of Part V where I set total PVE is set to 0.1 and assume 1 or 2 causal variables per region. I added in evaluation of lfsr per condition.
The most important difference from previous simulations is that here I mix-match simulated data under different prior assumptions to analyzing them with different priors. I expect to observe that:
The benchmark was executd on UChicago midway
./finemap.dsc --host mnm_R5.yml --R 5 -c 12
This executes the default
pipeline in finemap.dsc
file, as of today (2019.02.04).
%cd ~/GIT/github/mnm-twas/dsc
start_time <- Sys.time()
library('dscrutils')
out = dscquery('finemap_output', "sharing_pattern mnm.eff_mode susie_scores.total susie_scores.valid susie_scores.size susie_scores.purity susie_scores.top susie_scores.n_causal susie_scores.included_causal susie_scores.overlap susie_scores.false_pos_cond_discoveries susie_scores.false_neg_cond_discoveries susie_scores.true_cond_discoveries", omit.file.columns = T, verbose = F)
end_time <- Sys.time()
end_time - start_time
head(out)
dim(out)
saveRDS(out, '../data/finemap_output.query_result.rds')
res = out[,c(2,4,5,6,7,8,9,10,11,12,13,14,15)]
colnames(res) = c('pattern', 'method', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond')
purity = aggregate(purity~pattern + method, res, mean)
purity
aggregate(purity~method, purity, mean)
size = aggregate(size~pattern+method, res, median)
size
aggregate(size~method, size, mean)
total_true_included = aggregate(total_true_included ~ pattern + method, res, sum)
total_true = aggregate(total_true ~ pattern + method, res, sum)
overlap = aggregate(overlap ~ pattern + method, res, mean)
power = merge(total_true_included, total_true, by = c("pattern", "method"))
power = merge(power, overlap, by = c("pattern", "method"))
power$power = power$total_true_included/power$total_true
power = power[order(power$method),]
power
aggregate(power~method, power, mean)
valid = aggregate(valid ~ pattern + method, res, sum)
total = aggregate(total ~ pattern + method, res, sum)
fdr = merge(valid, total, by = c("pattern", "method"))
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr = fdr[order(fdr$method),]
fdr
aggregate(fdr~method, fdr, mean)
We compute lfsr on per signal per condition basis. We call it a signal in the condition if lfsr is smaller than 0.05.
tp = aggregate(true_positive_cross_cond ~ pattern + method, res, sum)
fn = aggregate(false_negative_cross_cond ~ pattern + method, res, sum)
power = merge(tp, fn, by = c("pattern", "method"))
power$power = power$true_positive_cross_cond/(power$true_positive_cross_cond + power$false_negative_cross_cond)
power = power[order(power$method),]
power
aggregate(power~method, power, mean)
tp = aggregate(true_positive_cross_cond ~ pattern + method, res, sum)
fp = aggregate(false_positive_cross_cond ~ pattern + method, res, sum)
fdr = merge(tp, fp, by = c("pattern", "method"))
fdr$fdr = fdr$false_positive_cross_cond/(fdr$true_positive_cross_cond + fdr$false_positive_cross_cond)
fdr = fdr[order(fdr$method),]
fdr
Total number of true discoveries over total number of signals to detect??