M&M ASH benchmark Part IV¶

This is a continuation of Part III where instead of looking at 1 causal SNP of PVE = 0.05 I look at a range of causal SNPs per gene with 50% having 1 causal, 30% two causal and 20% three causal. The total PVE is set to 0.15.

Conclusion¶

identity is still better than singleton but result for shared does not make sense.

./finemap.dsc --target sanity_check -o sanity_check2 -c 39

%cd ~/GIT/github/mnm-twas/dsc

/home/gaow/GIT/github/mnm-twas/dsc

library('dscrutils')
out = dscquery('sanity_check2', "hundred_data.dataset sharing_pattern.n_signal susie_scores.total susie_scores.valid susie_scores.size susie_scores.purity susie_scores.top", groups="sharing_pattern: singleton, identity, shared")

Loading dsc-query output from CSV file.
Reading DSC outputs:
 - sharing_pattern.n_signal: extracted atomic values
 - susie_scores.total: extracted atomic values
 - susie_scores.valid: extracted atomic values
 - susie_scores.size: extracted atomic values
 - susie_scores.purity: extracted atomic values
 - susie_scores.top: extracted atomic values

head(out)

out[,c(4,5,6,7,8,9)] = as.numeric(as.matrix(out[,c(4,5,6,7,8,9)]))
res = out[,c(3,4,5,6,7,8,9)]
colnames(res) = c('pattern', 'total_true', 'total', 'valid', 'size', 'purity', 'top_hit')

Purity of CS¶

aggregate(purity~pattern, res, mean)

Size of CS¶

aggregate(size~pattern, res, median)

Power¶

valid = aggregate(valid ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
power = merge(valid, total_true, by = "pattern")
power$power = power$valid/power$total_true
power

FDR¶

valid = aggregate(valid ~ pattern, res, sum)
total = aggregate(total ~ pattern, res, sum)
fdr = merge(valid, total, by = "pattern")
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr

Top-hit rate (how often the strongest SNP is causal)¶

top_hit = aggregate(top_hit ~ pattern, res, sum)
total_true = aggregate(total_true ~ pattern, res, sum)
top_rate = merge(top_hit, total_true, by = "pattern")
top_rate$top_rate = top_rate$top_hit/top_rate$total_true
top_rate

pattern	valid	total_true	power
identity	146	173	0.8439306
shared	115	163	0.7055215
singleton	132	161	0.8198758

pattern	valid	total	fdr
identity	146	152	0.03947368
shared	115	123	0.06504065
singleton	132	135	0.02222222

pattern	top_hit	total_true	top_rate
identity	84	173	0.4855491
shared	55	163	0.3374233
singleton	68	161	0.4223602

DSC	hundred_data.dataset	sharing_pattern	sharing_pattern.n_signal	susie_scores.total	susie_scores.valid	susie_scores.size	susie_scores.purity	susie_scores.top
1	~/Documents/GTExV8/Thyroid.Lung.FMO2.filled.rds	singleton	2	2	2	15.5	0.977735077359261	1
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000031823.RDS	singleton	3	1	1	1	1	1
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000062194.RDS	singleton	1	1	1	26	0.95151813430003	0
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000073150.RDS	singleton	1	1	1	12	0.957423948447025	0
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000078319.RDS	singleton	2	2	2	2	1	2
1	~/Documents/GTExV8/Toys/Thyroid.ENSG00000081277.RDS	singleton	2	2	2	3.5	0.933894486138865	1

pattern	purity
identity	0.9767568
shared	0.9812358
singleton	0.9686577

pattern	size
identity	5.00
shared	5.25
singleton	7.00