This benchmark is an improvments over the previous one, in the following espects.
L=2
and lower power setting compared to previous simulation.The corresponding DSC code are from c5d75a5
and to be reproduced as follows:
./finemap.dsc --host dsc_mnm.yml -o mnm_20191116
%cd ~/GIT/mvarbvs/dsc_mnm
out = dscrutils::dscquery('mnm_20191116', targets = c('simulate.n_traits', 'mnm.resid_method', 'mnm.missing_Y', 'mnm.alpha', 'mnm.L', 'susie_scores', 'susie_scores.total', 'susie_scores.valid', 'susie_scores.size', 'susie_scores.purity', 'susie_scores.top', 'susie_scores.n_causal', 'susie_scores.included_causal', 'susie_scores.overlap_var', 'susie_scores.overlap_cs','susie_scores.false_pos_cond_discoveries', 'susie_scores.false_neg_cond_discoveries', 'susie_scores.true_cond_discoveries', 'susie_scores.converged'),
module.output.files = "susie_scores", verbose = F)
head(out)
dim(out)
saveRDS(out, '../data/finemap_output.20191116.rds')
res = out[,-1]
colnames(res) = c('n_traits', 'resid_method', 'missing', 'EZ_model', 'L', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap_var', 'overlap_cs', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond', 'elbo_converged', 'filename')
Purity is higher for $R=45$ simply due to higher power; because in this simulation there is no FDR issue.
purity = aggregate(purity~n_traits + resid_method + missing + EZ_model + L, res, mean)
purity = purity[which(purity$missing==FALSE),-3]
purity = purity[order(purity$n_traits),]
purity
Focusing on $L = 2$ to evaluate overlapping CS status. In this case there still exists overlaps between CS, but not as many as with $L=10$. Overlapping status got worse when increased $R$.
total_true_included = aggregate(total_true_included ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
total_true = aggregate(total_true ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
cs_overlap = aggregate(overlap_cs ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
snp_overlap = aggregate(overlap_var ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
power = merge(total_true_included, total_true, by = c( 'n_traits' , 'resid_method' , 'missing' , 'EZ_model', 'L'))
power = merge(power, cs_overlap, by = c( 'n_traits' , 'resid_method' , 'missing' , 'EZ_model', 'L'))
power = merge(power, snp_overlap, by = c( 'n_traits' , 'resid_method' , 'missing' , 'EZ_model', 'L'))
power$power = round(power$total_true_included/power$total_true,3)
power$overlap_cs = round(power$overlap_cs, 3)
power$overlap_var = round(power$overlap_var, 3)
power = power[which(power$missing==FALSE),-3]
power = power[order(power$n_traits),]
power = power[order(power$L),]
power = power[order(power$EZ_model),]
#power = power[order(power$missing),]
power
valid = aggregate(valid ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'EZ_model', 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==FALSE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr
valid = aggregate(valid ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + EZ_model + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'EZ_model', 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==TRUE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr
Based on ELBO. In principle all runs should converge by ELBO. If it is not converged, then it means ELBO is not non-increasing.
It is only relevant to focus on $L>1$. For without missing data the runs do converge wrt ELBO.
elbo_converged = aggregate(elbo_converged~n_traits + resid_method + missing + EZ_model + L, res, mean)
#elbo_converged = elbo_converged[which(elbo_converged$missing==FALSE),-3]
elbo_converged = elbo_converged[which(elbo_converged$L!=1),]
elbo_converged = elbo_converged[order(elbo_converged$n_traits),]
elbo_converged
The convergence issue for EE model: they still have increasing ELBO; but the model did not converge after 100 iterations (ELBO still increase!)