In comparison with CAVIAR follow-ups; workflow implemented in this notebook.
%revisions -s -n 10
estimate_prior_variance=TRUE
in susieR::susie
we remove the differences.%preview /home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/CAVIAR_follow_up/chr11_63987798_63988488_clu_2010.CAVIAR.png
We trace back to its input data-set and see what is going on.
ls /home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/SuSiE_CS_*/chr11_63987798_63988488_clu_2010*
fn = "/home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/SuSiE_CS_2/chr11_63987798_63988488_clu_2010.rds"
susie = readRDS(fn)
dat = readRDS(susie$input)[[susie$idx]]
saveRDS(dat, "~/chr11_63987798_63988488_clu_2010_data.rds")
top_idx = which.max(abs(dat$z_score))
b = rep(0, length(dat$z_score))
b[top_idx] = 1
dat$pve
library(susieR)
set.seed(1)
fitted = susie(dat$X, dat$y, L=5,
estimate_residual_variance=TRUE,
prior_variance=dat$pve/(1-dat$pve),
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
To plot the fitting process:
susie_iterplot(fitted, 3, 'example_1')
%preview example_1.gif
fitted$niter
fitted$elbo
However, if I set L=2
as I did for CAVIAR (-c 2
), the result is more consistent with CAVIAR.
set.seed(1)
fitted = susie(dat$X, dat$y, L=2,
estimate_residual_variance=TRUE,
prior_variance=dat$pve/(1-dat$pve),
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
fitted$niter
fitted$elbo
Takes longer to converge but the ELBO is bigger for L=2
.
susie_iterplot(fitted, 3, 'example_1_l2')
%preview example_1_l2.gif
-c 3
¶%cd ~/tmp/14-Jul-2018
library(abind)
mm_regression = function(X, Y, Z=NULL) {
if (!is.null(Z)) {
Z = as.matrix(Z)
}
reg = lapply(seq_len(ncol(Y)), function (i) simplify2array(susieR:::univariate_regression(X, Y[,i], Z)))
reg = do.call(abind, c(reg, list(along=0)))
# return array: out[1,,] is betahat, out[2,,] is shat
return(aperm(reg, c(3,2,1)))
}
sumstats = mm_regression(as.matrix(dat$X), as.matrix(dat$y))
dat$Y = matrix(dat$y, length(dat$y), 1)
saveRDS(list(data=dat, sumstats=sumstats), 'N2.with_sumstats.rds')
export PATH=~/GIT/github/mvarbvs/dsc/modules/linux:$PATH
Rscript ~/GIT/software/susieR/inst/code/caviar.R input=\"N2.with_sumstats.rds\" output=\"N2finemapping.CAVIAR\" args=\"-c\ 3\"
caviar = readRDS("N2finemapping.CAVIAR.rds")
snp = caviar[[1]]$snp
pip = snp[order(as.numeric(snp$snp)),]$snp_prob
susie_pplot(pip, dtype='PIP', b=b, main = 'CAVIAR')
The result is less "clean", but still CAVIAR is able to capture the top signal.
export PATH=~/GIT/github/mvarbvs/dsc/modules/linux:$PATH
python ~/GIT/software/susieR/inst/code/dap-g.py N2.with_sumstats.rds N2finemapping.DAP -ld_control 0.20 --all
dap = readRDS("N2finemapping.DAP.rds")
snp = dap[[1]]$snp
pip = snp[order(as.numeric(snp$snp)),]$snp_prob
susie_pplot(pip, dtype='PIP', b=b, main = 'DAP')
DAP seems to work fine in this case.
L 10
¶set.seed(1)
fitted = susie(dat$X, dat$y, L=10,
estimate_residual_variance=TRUE,
prior_variance=dat$pve/(1-dat$pve),
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
Interestingly, 1 CS is picked up, that does not include the signal.
set.seed(1)
fitted = susie(dat$X, dat$y, L=5,
estimate_residual_variance=TRUE,
estimate_prior_variance=TRUE,
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
We now get the same output as CAVIAR.
%preview /home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/CAVIAR_follow_up/chr10_114186117_114186976_clu_3277.CAVIAR.png
ls /home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/SuSiE_CS_*/chr10_114186117_114186976_clu_3277*
fn = "/home/gaow/Documents/GIT/LargeFiles/JointLCL/AS_output/fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF_100Kb/PVE_Prior/SuSiE_CS_2/chr10_114186117_114186976_clu_3277.rds"
susie = readRDS(fn)
dat = readRDS(susie$input)[[susie$idx]]
saveRDS(dat, "~/chr10_114186117_114186976_clu_3277_data.rds")
top_idx = which.max(abs(dat$z_score))
b = rep(0, length(dat$z_score))
b[top_idx] = 1
dat$pve
library(susieR)
set.seed(1)
fitted = susie(dat$X, dat$y, L=5,
estimate_residual_variance=TRUE,
prior_variance=dat$pve/(1-dat$pve),
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
To track iterations:
susie_iterplot(fitted, 3, 'example_2')
%preview example_2.gif
fitted$niter
fitted$elbo
If I set L=1
:
library(susieR)
set.seed(1)
fitted = susie(dat$X, dat$y, L=1,
estimate_residual_variance=TRUE,
prior_variance=dat$pve/(1-dat$pve),
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, fitted = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
fitted$elbo
The ELBO here is in fact better.
susie_iterplot(fitted, 3, 'example_2_l1')
%preview example_2_l1.gif
CAVIAR is more robust to parameter choices here.
sumstats = mm_regression(as.matrix(dat$X), as.matrix(dat$y))
dat$Y = matrix(dat$y, length(dat$y), 1)
saveRDS(list(data=dat, sumstats=sumstats), 'N2.with_sumstats.rds')
export PATH=~/GIT/github/mvarbvs/dsc/modules/linux:$PATH
Rscript ~/GIT/software/susieR/inst/code/caviar.R input=\"N2.with_sumstats.rds\" output=\"N2finemapping.CAVIAR\" args=\"-c\ 3\"
caviar = readRDS("N2finemapping.CAVIAR.rds")
snp = caviar[[1]]$snp
pip = snp[order(as.numeric(snp$snp)),]$snp_prob
susie_pplot(pip, dtype='PIP', b=b, main = 'CAVIAR')
export PATH=~/GIT/github/mvarbvs/dsc/modules/linux:$PATH
python ~/GIT/software/susieR/inst/code/dap-g.py N2.with_sumstats.rds N2finemapping.DAP -ld_control 0.20 --all
dap = readRDS("N2finemapping.DAP.rds")
snp = dap[[1]]$snp
pip = snp[order(as.numeric(snp$snp)),]$snp_prob
susie_pplot(pip, dtype='PIP', b=b, main = 'DAP')
Here DAP also does not think the top z-score is strong enough to be boosted.
set.seed(1)
fitted = susie(dat$X, dat$y, L=5,
estimate_residual_variance=TRUE,
estimate_prior_variance=TRUE,
tol=1e-3, track_fit=TRUE)
sets = susie_get_CS(fitted,
coverage = 0.95,
X = dat$X,
min_abs_corr = 0.4)
pip = susie_get_PIP(fitted, sets$cs_index)
susie_pplot(pip, res = fitted, CS = sets, max_cs = 0.4, dtype='PIP',b=b)
Now SuSiE gives exactly same result as CAVIAR.
fitted$sa2