This is benchmark using a non-trivial simulation scheme, and analyzing it using our current implementation of EM updates to prior scalar. See this notebook for details how it is executed.
Should we add in susieR
as a comparison and if so how to compare PIP fairly?
%cd ~/GIT/mvarbvs/dsc/mnm_prototype
dat = dscrutils::dscquery('mnm_20200430', targets = c('simulate', "mnm", "mnm.resid_method",
'susie_scores', 'susie_scores.total', 'susie_scores.valid', 'susie_scores.size',
'susie_scores.purity', 'susie_scores.top', 'susie_scores.n_causal', 'susie_scores.included_causal',
'susie_scores.overlap_var', 'susie_scores.overlap_cs','susie_scores.false_pos_cond_discoveries',
'susie_scores.false_neg_cond_discoveries', 'susie_scores.true_cond_discoveries', 'susie_scores.converged'),
module.output.files = "susie_scores", verbose = F)
dat$method = NA
dat$method[which(!is.na(dat$mnm.resid_method))] = paste(dat$mnm[which(!is.na(dat$mnm.resid_method))], paste(dat$mnm.resid_method[which(!is.na(dat$mnm.resid_method))], "residual", sep = '_'), sep = '+')
dat$method[which(is.na(dat$mnm.resid_method))] = dat$mnm[which(is.na(dat$mnm.resid_method))]
dim(dat)
saveRDS(dat, '../data/finemap_output.20200430.rds')
head(dat)
res = dat[,-c(1,3,4)]
colnames(res) = c('scenario', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap_var', 'overlap_cs', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond', 'converged', 'filename', 'method')
purity = aggregate(purity~scenario + method, res, mean)
purity = purity[order(purity$scenario),]
purity
total_true_included = aggregate(total_true_included ~ scenario + method, res, sum)
total_true = aggregate(total_true ~ scenario + method, res, sum)
cs_overlap = aggregate(overlap_cs ~ scenario + method, res, sum)
snp_overlap = aggregate(overlap_var ~ scenario + method, res, sum)
power = merge(total_true_included, total_true, by = c( 'scenario' , 'method'))
power = merge(power, cs_overlap, by = c( 'scenario' , 'method'))
power = merge(power, snp_overlap, by = c( 'scenario' , 'method'))
power$power = round(power$total_true_included/power$total_true,3)
power$overlap_cs = round(power$overlap_cs, 3)
power$overlap_var = round(power$overlap_var, 3)
power = power[order(power$scenario),]
power
valid = aggregate(valid ~ scenario + method, res, sum)
total = aggregate(total ~ scenario + method, res, sum)
fdr = merge(valid, total, by = c( 'scenario' , 'method'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[order(fdr$scenario),]
fdr
Based on ELBO. In principle all runs should converge by ELBO. If it is not converged, then it means either ELBO is not non-increasing or it exceeds max iteration.
elbo_converged = aggregate(converged~scenario + method, res, mean)
elbo_converged = elbo_converged[order(elbo_converged$scenario),]
elbo_converged