This benchmark is an improvments over the previous one with mostly the same setup but hopefully previously observed issues are fixed.
The only major difference in setting is that I removed EZ model from mvsusieR
package thus the DSC for it.
The corresponding DSC code are from ee50493
and to be reproduced as follows:
./finemap.dsc --host dsc_mnm.yml -o mnm_20191209
%cd ~/GIT/mvarbvs/dsc_mnm
out = dscrutils::dscquery('mnm_20191209', targets = c('simulate.n_traits', 'mnm', 'mnm.resid_method', 'mnm.L',
'susie_scores', 'susie_scores.total', 'susie_scores.valid', 'susie_scores.size',
'susie_scores.purity', 'susie_scores.top', 'susie_scores.n_causal', 'susie_scores.included_causal',
'susie_scores.overlap_var', 'susie_scores.overlap_cs','susie_scores.false_pos_cond_discoveries',
'susie_scores.false_neg_cond_discoveries', 'susie_scores.true_cond_discoveries', 'susie_scores.converged'),
module.output.files = "susie_scores", verbose = F)
dim(out)
saveRDS(out, '../data/finemap_output.20191209.rds')
out$missing = out$mnm
out$missing[which(out$missing=='mnm_high_het')] = FALSE
out$missing[which(out$missing=='mnm_high_het_missing')] = TRUE
out$mnm = NULL
head(out)
res = out[,-1]
colnames(res) = c('n_traits', 'resid_method', 'L', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap_var', 'overlap_cs', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond', 'converged', 'filename', 'missing')
purity = aggregate(purity~n_traits + resid_method + missing + L, res, mean)
purity = purity[order(purity$n_traits),]
purity
total_true_included = aggregate(total_true_included ~ n_traits + resid_method + missing + L, res, sum)
total_true = aggregate(total_true ~ n_traits + resid_method + missing + L, res, sum)
cs_overlap = aggregate(overlap_cs ~ n_traits + resid_method + missing + L, res, sum)
snp_overlap = aggregate(overlap_var ~ n_traits + resid_method + missing + L, res, sum)
power = merge(total_true_included, total_true, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power = merge(power, cs_overlap, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power = merge(power, snp_overlap, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power$power = round(power$total_true_included/power$total_true,3)
power$overlap_cs = round(power$overlap_cs, 3)
power$overlap_var = round(power$overlap_var, 3)
power = power[order(power$n_traits),]
power = power[order(power$L),]
power = power[order(power$missing),]
power
valid = aggregate(valid ~ n_traits + resid_method + missing + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==FALSE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr
valid = aggregate(valid ~ n_traits + resid_method + missing + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==TRUE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr
Based on ELBO. In principle all runs should converge by ELBO. If it is not converged, then it means either ELBO is not non-increasing or it exceeds max iteration.
It is only relevant to focus on $L>1$.
elbo_converged = aggregate(converged~n_traits + resid_method + missing + L, res, mean)
elbo_converged = elbo_converged[which(elbo_converged$missing==FALSE),-3]
elbo_converged = elbo_converged[which(elbo_converged$L!=1),]
elbo_converged = elbo_converged[order(elbo_converged$n_traits),]
elbo_converged
However if ELBO is not increasing then we should see a warning. The fact we dont see it (no stderr
files present in the DSC folder) means 100 iterations were not good enough for ELBO to converge at our default tolerance level 0.001.
Since for missing data we used convergence of PIP to stop the IBSS algorithm, let's check it:
pip_converged = aggregate(converged~n_traits + resid_method + missing + L, res, mean)
pip_converged = pip_converged[which(pip_converged$missing==TRUE),-3]
pip_converged = pip_converged[which(pip_converged$L!=1),]
pip_converged = pip_converged[order(pip_converged$n_traits),]
pip_converged
So it all converges by PIP, if we dont check by ELBO.