M&M benchmark XII¶

This benchmark is an improvments over the previous one with mostly the same setup but hopefully previously observed issues are fixed.

The only major difference in setting is that I removed EZ model from mvsusieR package thus the DSC for it.

Conclusion¶

The corresponding DSC code are from ee50493 and to be reproduced as follows:

./finemap.dsc --host dsc_mnm.yml -o mnm_20191209

%cd ~/GIT/mvarbvs/dsc_mnm

/project2/mstephens/gaow/mvarbvs/dsc_mnm

out = dscrutils::dscquery('mnm_20191209', targets = c('simulate.n_traits', 'mnm', 'mnm.resid_method', 'mnm.L', 
                                                      'susie_scores', 'susie_scores.total', 'susie_scores.valid', 'susie_scores.size', 
                                                      'susie_scores.purity', 'susie_scores.top', 'susie_scores.n_causal', 'susie_scores.included_causal', 
                                                      'susie_scores.overlap_var', 'susie_scores.overlap_cs','susie_scores.false_pos_cond_discoveries', 
                                                      'susie_scores.false_neg_cond_discoveries', 'susie_scores.true_cond_discoveries', 'susie_scores.converged'),
                          module.output.files = "susie_scores", verbose = F)

dim(out)

saveRDS(out, '../data/finemap_output.20191209.rds')

out$missing = out$mnm
out$missing[which(out$missing=='mnm_high_het')] = FALSE
out$missing[which(out$missing=='mnm_high_het_missing')] = TRUE
out$mnm = NULL

head(out)

res = out[,-1]
colnames(res) = c('n_traits', 'resid_method', 'L', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap_var', 'overlap_cs', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond', 'converged', 'filename', 'missing')

Purity of CS¶

purity = aggregate(purity~n_traits + resid_method + missing + L, res, mean)
purity = purity[order(purity$n_traits),]
purity

Power of CS¶

total_true_included = aggregate(total_true_included ~ n_traits + resid_method + missing + L, res, sum)
total_true = aggregate(total_true ~  n_traits + resid_method + missing + L, res, sum)
cs_overlap = aggregate(overlap_cs ~  n_traits + resid_method + missing + L, res, sum)
snp_overlap = aggregate(overlap_var ~  n_traits + resid_method + missing + L, res, sum)
power = merge(total_true_included, total_true, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power = merge(power, cs_overlap,  by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power = merge(power, snp_overlap,  by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
power$power = round(power$total_true_included/power$total_true,3)
power$overlap_cs = round(power$overlap_cs, 3)
power$overlap_var = round(power$overlap_var, 3)
power = power[order(power$n_traits),]
power = power[order(power$L),]
power = power[order(power$missing),]
power

FDR of CS no missing data¶

valid = aggregate(valid ~ n_traits + resid_method + missing + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==FALSE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr

FDR of CS with missing data¶

valid = aggregate(valid ~ n_traits + resid_method + missing + L, res, sum)
total = aggregate(total ~ n_traits + resid_method + missing + L, res, sum)
fdr = merge(valid, total, by = c( 'n_traits' , 'resid_method' , 'missing' , 'L'))
fdr$fdr = round((fdr$total - fdr$valid)/fdr$total,3)
fdr = fdr[which(fdr$missing==TRUE),-3]
fdr = fdr[order(fdr$n_traits),]
fdr

Convergence¶

Based on ELBO. In principle all runs should converge by ELBO. If it is not converged, then it means either ELBO is not non-increasing or it exceeds max iteration.

It is only relevant to focus on $L>1$.

elbo_converged = aggregate(converged~n_traits + resid_method + missing +  L, res, mean)
elbo_converged = elbo_converged[which(elbo_converged$missing==FALSE),-3]
elbo_converged = elbo_converged[which(elbo_converged$L!=1),]
elbo_converged = elbo_converged[order(elbo_converged$n_traits),]
elbo_converged

However if ELBO is not increasing then we should see a warning. The fact we dont see it (no stderr files present in the DSC folder) means 100 iterations were not good enough for ELBO to converge at our default tolerance level 0.001.

Since for missing data we used convergence of PIP to stop the IBSS algorithm, let's check it:

pip_converged = aggregate(converged~n_traits + resid_method + missing +  L, res, mean)
pip_converged = pip_converged[which(pip_converged$missing==TRUE),-3]
pip_converged = pip_converged[which(pip_converged$L!=1),]
pip_converged = pip_converged[order(pip_converged$n_traits),]
pip_converged

So it all converges by PIP, if we dont check by ELBO.

	n_traits	resid_method	missing	L	purity
1	5	diag	FALSE	1	0.9950368
3	5	flash	FALSE	1	0.9936965
5	5	oracle	FALSE	1	0.9953622
7	5	diag	TRUE	1	0.9853424
9	5	flash	TRUE	1	0.9852914
11	5	diag	FALSE	2	0.9949696
13	5	flash	FALSE	2	0.9934708
15	5	oracle	FALSE	2	0.9950214
17	5	diag	TRUE	2	0.9853550
19	5	flash	TRUE	2	0.9853039
21	5	diag	FALSE	10	0.9949696
23	5	flash	FALSE	10	0.9934708
25	5	oracle	FALSE	10	0.9950214
27	5	diag	TRUE	10	0.9853550
29	5	flash	TRUE	10	0.9853039
2	45	diag	FALSE	1	0.9999438
4	45	flash	FALSE	1	0.9946040
6	45	oracle	FALSE	1	0.9999487
8	45	diag	TRUE	1	0.9989562
10	45	flash	TRUE	1	0.9959553
12	45	diag	FALSE	2	0.9999438
14	45	flash	FALSE	2	0.9946040
16	45	oracle	FALSE	2	0.9999487
18	45	diag	TRUE	2	0.9989562
20	45	flash	TRUE	2	0.9959553
22	45	diag	FALSE	10	0.9999438
24	45	flash	FALSE	10	0.9946040
26	45	oracle	FALSE	10	0.9999487
28	45	diag	TRUE	10	0.9989562
30	45	flash	TRUE	10	0.9959553

	n_traits	resid_method	missing	L	total_true_included	total_true	power
16	5	diag	FALSE	1	495	500	0.990
22	5	flash	FALSE	1	497	500	0.994
28	5	oracle	FALSE	1	495	500	0.990
1	45	diag	FALSE	1	500	500	1.000
7	45	flash	FALSE	1	498	500	0.996
13	45	oracle	FALSE	1	500	500	1.000
18	5	diag	FALSE	2	495	500	0.990
24	5	flash	FALSE	2	497	500	0.994
30	5	oracle	FALSE	2	495	500	0.990
3	45	diag	FALSE	2	500	500	1.000
9	45	flash	FALSE	2	498	500	0.996
15	45	oracle	FALSE	2	500	500	1.000
17	5	diag	FALSE	10	495	500	0.990
23	5	flash	FALSE	10	497	500	0.994
29	5	oracle	FALSE	10	495	500	0.990
2	45	diag	FALSE	10	500	500	1.000
8	45	flash	FALSE	10	498	500	0.996
14	45	oracle	FALSE	10	500	500	1.000
19	5	diag	TRUE	1	492	500	0.984
25	5	flash	TRUE	1	492	500	0.984
4	45	diag	TRUE	1	499	500	0.998
10	45	flash	TRUE	1	498	500	0.996
21	5	diag	TRUE	2	492	500	0.984
27	5	flash	TRUE	2	492	500	0.984
6	45	diag	TRUE	2	499	500	0.998
12	45	flash	TRUE	2	498	500	0.996
20	5	diag	TRUE	10	492	500	0.984
26	5	flash	TRUE	10	492	500	0.984
5	45	diag	TRUE	10	499	500	0.998
11	45	flash	TRUE	10	498	500	0.996

	n_traits	resid_method	L	valid	total	fdr
16	5	diag	1	495	500	0.010
17	5	diag	10	495	501	0.012
18	5	diag	2	495	501	0.012
22	5	flash	1	497	500	0.006
23	5	flash	10	497	501	0.008
24	5	flash	2	497	501	0.008
28	5	oracle	1	495	500	0.010
29	5	oracle	10	495	503	0.016
30	5	oracle	2	495	503	0.016
1	45	diag	1	500	500	0.000
2	45	diag	10	500	500	0.000
3	45	diag	2	500	500	0.000
7	45	flash	1	498	498	0.000
8	45	flash	10	498	498	0.000
9	45	flash	2	498	498	0.000
13	45	oracle	1	500	500	0.000
14	45	oracle	10	500	500	0.000
15	45	oracle	2	500	500	0.000

	n_traits	resid_method	L	valid	total	fdr
19	5	diag	1	492	500	0.016
20	5	diag	10	492	502	0.020
21	5	diag	2	492	502	0.020
25	5	flash	1	492	500	0.016
26	5	flash	10	492	502	0.020
27	5	flash	2	492	502	0.020
4	45	diag	1	499	500	0.002
5	45	diag	10	499	500	0.002
6	45	diag	2	499	500	0.002
10	45	flash	1	498	499	0.002
11	45	flash	10	498	499	0.002
12	45	flash	2	498	499	0.002

	n_traits	resid_method	L	converged
11	5	diag	2	0.870
13	5	flash	2	0.876
15	5	oracle	2	0.860
21	5	diag	10	0.870
23	5	flash	10	0.876
25	5	oracle	10	0.860
12	45	diag	2	0.904
14	45	flash	2	0.934
16	45	oracle	2	0.882
22	45	diag	10	0.904
24	45	flash	10	0.934
26	45	oracle	10	0.882

DSC	simulate.n_traits	mnm.resid_method	mnm.L	susie_scores.total	susie_scores.valid	susie_scores.size	susie_scores.purity	susie_scores.top	susie_scores.n_causal	susie_scores.included_causal	susie_scores.true_cond_discoveries	susie_scores.converged	susie_scores.output.file	missing
1	5	oracle	1	1	1	1	1.0000000	1	1	1	5	TRUE	susie_scores/full_data_1_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE
1	5	oracle	1	1	1	1	1.0000000	1	1	1	5	TRUE	susie_scores/full_data_2_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE
1	5	oracle	1	1	1	1	1.0000000	1	1	1	5	TRUE	susie_scores/full_data_3_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE
1	5	oracle	1	1	1	12	0.9883042	0	1	1	5	TRUE	susie_scores/full_data_4_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE
1	5	oracle	1	1	1	22	1.0000000	0	1	1	5	TRUE	susie_scores/full_data_5_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE
1	5	oracle	1	1	1	1	1.0000000	1	1	1	5	FALSE	susie_scores/full_data_6_high_het_1_oracle_generator_1_mnm_high_het_1_susie_scores_1	FALSE