Here I pick up a particular data-set and make a specific simulation case of linkage vs pleiotropy.
See here for motivation of the analysis.
%cd /home/gaow/tmp/13-May-2019
genotype = readRDS('Multi_Tissues.ENSG00000089486.RDS')$X
ld = cor(genotype)
From MASH paper the variants in question are:
"4","ENSG00000089486.12","16_4525265_C_T_b37","16_4594671_C_T_b37"
Let's check them out. First liftOver here
chr16:4525264-4525265
chr16:4594670-4594671
to
chr16:4475263-4475264
chr16:4544669-4544670
which is chr16_4475264_C_T_b38
and chr16_4544670_C_T_b38
ld["chr16_4475264_C_T_b38", "chr16_4544670_C_T_b38"]
snp1 = which(rownames(ld) == "chr16_4475264_C_T_b38")
snp2 = which(rownames(ld) == "chr16_4544670_C_T_b38")
apply(genotype,2,sum)[snp1] / nrow(genotype) / 2
apply(genotype,2,sum)[snp2] / nrow(genotype) / 2
P = ncol(genotype)
R = 6
eff_factor = 1.2
set.seed(1)
b = matrix(0, P, R)
sharing = matrix(0.75, 3, 3)
diag(sharing) = 1
b[snp1, 1:3] = abs(MASS::mvrnorm(1, rep(0,3), sharing)) / eff_factor
b[snp2, 4:6] = abs(MASS::mvrnorm(1, rep(0,3), sharing)) / eff_factor
print(b[snp1, 1:3])
print(b[snp2, 4:6])
g = genotype %*% b
apply(g,2,sd) * eff_factor
e = MASS::mvrnorm(nrow(g), rep(0,R), diag(R) * 1)
Y = g + e
U1 = matrix(0,R,R)
U1[1:3,1:3] = sharing
U2 = matrix(0,R,R)
U2[4:6,4:6] = sharing
Ulist = list(U1=U1, U2=U2)
scaling = c(0.5,1) / eff_factor
mash_init = mvsusieR:::MashInitializer$new(Ulist, scaling, alpha = 1)
res = mvsusieR::mvsusie(genotype,Y,
L=10,prior_variance=mash_init,
compute_objective=FALSE)
snps = sub("_[A-Z]*_[A-Z]*_b38", "", colnames(genotype))
# conditions = c('Artery_Aorta', 'Artery_Coronary', 'Artery_Tibial', 'Caudate_basal_ganglia', 'Nucleus_accumbens_basal_ganglia', 'Putamen_basal_ganglia')
rownames(res$coef) = c('intercept', snps)
# colnames(res$coef) = conditions
Notice that with causal SNP being correlated, the sum of PIP in a CS is > 1.0.
res$alpha[1,res$sets$cs$L1]
res$pip[res$sets$cs$L2]
pdf('susie_plot_toy.pdf', width=8, height=4)
susieR::susie_plot(res,y='PIP', main = 'Cross-condition Posterior Inclusion Probability', xlab = 'SNP positions', add_legend = F)
dev.off()
p = mvsusieR::mvsusie_plot(res)
pdf('mvsusie_plot_toy.pdf', width = 14, height = 4)
print(p$plot)
dev.off()
%preview susie_plot_toy.pdf -s png --dpi 150
%preview mvsusie_plot_toy.pdf -s png --dpi 150
For original effect size,
univariate_res = lapply(1:ncol(Y), function(i) susieR:::univariate_regression(genotype,Y[,i]))
res$bhat = do.call(cbind, lapply(1:ncol(Y), function(i) univariate_res[[i]]$betahat))
res$shat = do.call(cbind, lapply(1:ncol(Y), function(i) univariate_res[[i]]$sebetahat))
rownames(res$bhat) = snps
# colnames(res$bhat) = conditions
p = mvsusieR::mvsusie_plot(res, original_sumstat = TRUE)
pdf('mvsusie_plot_toy_original.pdf', width = 14, height = 4.5)
print(p$plot)
dev.off()
%preview mvsusie_plot_toy_original.pdf -s png --dpi 150