M&M ASH benchmark Part II¶

This is a continuation of Part I where I use only $R=2$ conditions, 1 causal SNP of PVE = 0.05, with simple singleton, identity and fully shared patterns. The goal is to ensure all computations are correct.

Conclusion¶

Results below seems to make sense, but still:

The power gain of shared compared to identity is minimal
top_hit_rate is lower for shared compared to identity which is a bit puzzling

./finemap.dsc --target sanity_check -o sanity_check

%cd ~/GIT/github/mnm-twas/dsc

/home/gaow/GIT/github/mnm-twas/dsc

library('dscrutils')
out = dscquery('sanity_check', "hundred_data.dataset sharing_pattern.n_signal susie_scores.total susie_scores.valid susie_scores.size susie_scores.purity susie_scores.top", groups="sharing_pattern: singleton, identity, shared")

Loading dsc-query output from CSV file.
Reading DSC outputs:
 - sharing_pattern.n_signal: extracted atomic values
 - susie_scores.total: extracted atomic values
 - susie_scores.valid: extracted atomic values
 - susie_scores.size: extracted atomic values
 - susie_scores.purity: extracted atomic values
 - susie_scores.top: extracted atomic values

head(out)

out[,c(4,5,6,7,8,9)] = as.numeric(as.matrix(out[,c(4,5,6,7,8,9)]))
res = out[,c(3,4,5,6,7,8,9)]
colnames(res) = c('pattern', 'total_true', 'total', 'valid', 'size', 'purity', 'top_hit')

Purity of CS¶

aggregate(purity~pattern, res, mean)

Size of CS¶

aggregate(size~pattern, res, median)

Power¶

valid = aggregate(valid ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
power = merge(valid, total_true, by = "pattern")
power$power = power$valid/power$total_true
power

FDR¶

valid = aggregate(valid ~ pattern, res, sum)
total = aggregate(total ~ pattern, res, sum)
fdr = merge(valid, total, by = "pattern")
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr

Top-hit rate (how often the strongest SNP is causal)¶

top_hit = aggregate(top_hit ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
top_rate = merge(top_hit, total_true, by = "pattern")
top_rate$top_rate = top_rate$top_hit/top_rate$total_true
top_rate

DSC	hundred_data.dataset	sharing_pattern	sharing_pattern.n_signal	susie_scores.total	susie_scores.valid	susie_scores.size	susie_scores.purity	susie_scores.top
1	~/Documents/GTExV8/Thyroid.Lung.FMO2.filled.rds	singleton	1	1	1	15	0.931293607944096	0
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000031823.RDS	singleton	1	1	1	10	0.916386609486197	1
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000062194.RDS	singleton	1	1	1	8	0.89922481268286	1
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000073150.RDS	singleton	1	1	1	17	0.965612490834539	1
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000078319.RDS	singleton	1	1	1	110	0.797004256892404	0
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000081277.RDS	singleton	1	1	1	13	0.810629508314749	0

pattern	valid	total_true	power
identity	97	100	0.97
shared	98	100	0.98
singleton	77	100	0.77

pattern	valid	total	fdr
identity	97	98	0.01020408
shared	98	101	0.02970297
singleton	77	81	0.04938272

pattern	top_hit	total_true	top_rate
identity	63	100	0.63
shared	55	100	0.55
singleton	30	100	0.30

pattern	purity
identity	0.9401603
shared	0.9442816
singleton	0.7290132