%revisions -s -n 10
Previously I've ran this specific DSC using:
dsc susie.dsc --target run_comparison -o susie_comparison
Here I query from the result.
[global]
parameter: cwd = path('~/GIT/github/mvarbvs/dsc')
parameter: date = '1008'
# set to 0 to use estimated prior, or another number to use specified prior
# as configured in the DSC
parameter: susie_prior = [0.1, 0.0]
# null weight options to evaluate
parameter: null_weight = [0.0, 0.5, 0.9, 0.95]
dirname = path(f'{cwd:a}/susie_comparison/')
def fmtP(x):
return str(x).replace(".", "p")
[pip_1, power_1, cali_pip_1, coverage_1, roc_1]
output: f'{dirname}/PIP_comparison_{date}.rds'
R: expand = '${ }', workdir = cwd
dap_out = dscrutils::dscquery(${dirname:br},
target = "liter_data.dataset liter_data lm_less lm_less.pve lm_less.n_signal fit_dap")
susie_out = dscrutils::dscquery(${dirname:br},
target = "liter_data.dataset liter_data lm_less lm_less.pve lm_less.n_signal fit_susie.prior_var fit_susie.null_weight fit_susie")
caviar_out = dscrutils::dscquery(${dirname:br},
target = "liter_data.dataset liter_data lm_less lm_less.pve lm_less.n_signal fit_caviar.args fit_caviar")
finemap_out = dscrutils::dscquery(${dirname:br},
target = "liter_data.dataset liter_data lm_less lm_less.pve lm_less.n_signal fit_finemap.args fit_finemap")
saveRDS(list(dap=dap_out, susie=susie_out, caviar=caviar_out, finemap=finemap_out), ${_output:r})
[pip_2]
input: for_each = ['null_weight', 'susie_prior'], concurrent = True
output: f'{_input:n}_prior_{fmtP(_susie_prior)}_null_{fmtP(_null_weight)}.rds'
R: stdout = f'{_output:n}.log', expand = '${ }', workdir = cwd
dat = readRDS(${_input:r})
dap_out = dat$dap
caviar_out = dat$caviar
susie_out = dat$susie
finemap_out = dat$finemap
# favorit susie flavor
susie_out = susie_out[which(susie_out$fit_susie.prior_var == ${_susie_prior} & susie_out$fit_susie.null_weight == ${_null_weight}), ]
susie_out = subset(susie_out, select =-c(lm_less.pve, fit_susie.prior_var, fit_susie.null_weight))
data_sets = unique(susie_out$liter_data.dataset)
n_signals = unique(susie_out$lm_less.n_signal)
result = list()
for (s in n_signals) {
result[[as.character(s)]] = NULL
if (s > 3) {
has_caviar = FALSE
} else {
has_caviar = TRUE
}
print(paste('==============', s, '=============='))
for (d in data_sets) {
in_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("lm_less.output.file"),drop=FALSE]
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', in_files[1,1]))$data$true_coef
for (r in 1:1) {
signals = which(truth[,r]!=0)
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("fit_susie.output.file"),drop=FALSE]
susie = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
susie = susie[[r]]$vars
# option 1, only compare the mappable set
# in_CI = susie$variable[which(susie$cs>0)]
# pip = susie$variable_prob[which(susie$cs>0)]
# or option 2 compare everything
in_CI = susie$variable
pip = susie$variable_prob
#
out_files = dap_out[which(dap_out$lm_less.n_signal == s & dap_out$liter_data.dataset == d),c("fit_dap.output.file"),drop=FALSE]
dap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
dap = dap[[as.character(r-1)]]$snp
#print(head(dap, length(pip)))
dap = dap[which(dap$snp %in% as.character(in_CI)),]
dap = dap[match(in_CI, dap$snp),]
#print(dap)
#print(pip)
#print(in_CI)
if (has_caviar) {
out_files = caviar_out[which(caviar_out$lm_less.n_signal == s & caviar_out$liter_data.dataset == d & caviar_out$fit_caviar.args == paste('-g 0.001 -c', s)),c("fit_caviar.output.file"),drop=FALSE]
caviar = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
caviar = caviar[[r]]$snp
caviar = caviar[which(caviar$snp %in% as.character(in_CI)),]
caviar = caviar[match(in_CI, caviar$snp),]
out_files = finemap_out[which(finemap_out$lm_less.n_signal == s & finemap_out$liter_data.dataset == d & finemap_out$fit_finemap.args == paste('--n-causal-max', s)),c("fit_finemap.output.file"),drop=FALSE]
finemap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
finemap = finemap[[r]]$snp
finemap = finemap[which(finemap$snp %in% as.character(in_CI)),]
finemap = finemap[match(in_CI, finemap$snp),]
pip = cbind(pip, as.vector(dap$snp_prob), as.vector(caviar$snp_prob), as.vector(finemap$snp_prob), in_CI %in% signals)
} else {
pip = cbind(pip, as.vector(dap$snp_prob), in_CI %in% signals)
}
# remove all zero PIP / table
pip = pip[rowSums(pip) > 0, ]
## BEGIN debug
outlier = pip[which(pip[,1] < 0.1 & pip[,2]>0.6), ,drop=F]
if (nrow(outlier)>0 && s <= 2) {
print("DAP outlier")
print(c(r,d,in_files))
}
if (has_caviar && s == 1) {
conflict = pip[which(pip[,1] < 0.95 & pip[,3] > 0.95), ,drop=F]
if (nrow(conflict) > 0) {
print("CAVIAR-susie conflict")
print(d)
print(pip[conflict,])
print("CAVIAR")
print(caviar[which(caviar$snp_prob>0.95),])
print("susie")
print(susie[which(susie$variable_prob>0.95),])
}
}
## END debug
if (is.null(result[[as.character(s)]])) {
result[[as.character(s)]] = pip
} else {
result[[as.character(s)]] = rbind(result[[as.character(s)]], pip)
}
}
}
result[[as.character(s)]] = data.frame(result[[as.character(s)]])
if (has_caviar) {
colnames(result[[as.character(s)]]) = c('susie', 'dap', 'caviar', 'finemap', 'is_signal')
} else {
colnames(result[[as.character(s)]]) = c('susie', 'dap', 'is_signal')
}
}
saveRDS(result, ${_output:r})
[pip_3]
comparisons = ['susie_vs_dap', 'susie_vs_caviar', 'susie_vs_finemap', 'dap_vs_caviar', 'dap_vs_finemap', 'caviar_vs_finemap']
output: paths([f'{_input:n}.{x}.png' for x in comparisons])
R: expand = '${ }'
result = readRDS(${_input:r})
merge_img = function(prefix, n) {
files = paste0(prefix, '_', seq(1:n), '.png')
cmd = paste('convert +append', paste(files, collapse=" "), paste0(prefix, '.png'))
system(cmd)
system(paste('rm -f', paste(files, collapse=" ")))
files = paste0(prefix, '_', seq(1:n), '.md')
cmd = paste('cat', paste(files, collapse=" "), '>', paste0(prefix, '.md'))
system(cmd)
system(paste('rm -f', paste(files, collapse=" ")))
}
merge_lists = function(lists) {
lists = do.call(rbind,lapply(cbind(lists), unlist))
names = colnames(lists)
lists = lapply(1:ncol(lists), function(i) lists[,i])
names(lists) = names
return(lists)
}
plot_pip = function(x, n_causal, s, output_prefix, xname, yname, xlab, ylab, pip_cutoff = -1, pip_diff_categories = c(0.1, 0.15, 0.2)) {
x = x[x[[xname]] > pip_cutoff & x[[yname]] > pip_cutoff,]
if (pip_cutoff >=0) {
xlab = paste0(xlab, ">", pip_cutoff)
ylab = paste0(ylab, ">", pip_cutoff)
}
colors = sapply(1:length(x$is_signal), function(i) ifelse(x$is_signal[i],'#800000','#d6d6ce'))
# compute difference in pip and proportions
pip_diff = abs(x[[xname]] - x[[yname]])
pip_diff_cnts = sapply(pip_diff_categories, function(x) length(which(pip_diff >= x)))
pip_diff_text = vector()
for (i in 1:length(pip_diff_categories)) {
tmp = paste0('- ', pip_diff_cnts[i], '/', length(pip_diff), ' (', formatC(pip_diff_cnts[i] / length(pip_diff) * 100, format = "e", digits = 2), '%) differ by ', pip_diff_categories[i])
pip_diff_text = c(pip_diff_text, tmp)
}
pip_diff_text = paste(pip_diff_text, collapse = '\n')
corr_text = paste('- correlation', round(cor(x)[1,2],2))
header = paste('#', xname, 'vs', yname, s, n_causal, 'causal\n')
pdf(paste0(output_prefix, '_', n_causal, '.pdf'), width=5, height=5, pointsize=14)
plot(x[[xname]][which(x$is_signal==0)], x[[yname]][which(x$is_signal==0)], xlab = xlab, ylab = ylab, xlim = c(0,1), ylim = c(0,1),
main = paste0(n_causal, s , ifelse(n_causal>1, ' effect variables', ' effect variable')),
col = '#d6d6ce', pch = 20, cex = 1.4, bty='l')
points(x[[xname]][which(x$is_signal==1)], x[[yname]][which(x$is_signal==1)], col='#800000', pch=20, cex=1.4)
abline(0,1,col=2)
#abline(h=0.95, col='gray')
#abline(v=0.95, col='gray')
dev.off()
system(paste0("convert -flatten -density 120 ", paste0(output_prefix, '_', n_causal, '.pdf'), " ", paste0(output_prefix, '_', n_causal, '.png')))
write(paste(header, corr_text, pip_diff_text, '\n', sep = '\n'), paste0(output_prefix, '_', n_causal, '.md'))
}
result_1 = result[[1]]
result_2 = result[[2]]
result_3 = result[[3]]
result_5 = do.call(rbind, lapply(3:5, function(i) result[[i]][,c('susie','dap', 'is_signal')]))
# susie vs dap
plot_pip(result_1, 1, '', ${_output[0]:nr}, 'susie', 'dap', 'PIP SuSiE', 'PIP DAP-G')
plot_pip(result_2, 2, '', ${_output[0]:nr}, 'susie', 'dap', 'PIP SuSiE', 'PIP DAP-G')
plot_pip(result_5, 3, ' ~ 5', ${_output[0]:nr}, 'susie', 'dap', 'PIP SuSiE', 'PIP DAP-G')
merge_img(${_output[0]:nr}, 3)
# susie vs caviar
plot_pip(result_1, 1, '', ${_output[1]:nr}, 'susie', 'caviar', 'PIP SuSiE', 'PIP CAVIAR')
plot_pip(result_2, 2, '', ${_output[1]:nr}, 'susie', 'caviar', 'PIP SuSiE', 'PIP CAVIAR')
plot_pip(result_3, 3, '', ${_output[1]:nr}, 'susie', 'caviar', 'PIP SuSiE', 'PIP CAVIAR')
merge_img(${_output[1]:nr}, 3)
# susie vs finemap
plot_pip(result_1, 1, '', ${_output[2]:nr}, 'susie', 'finemap', 'PIP SuSiE', 'PIP FINEMAP')
plot_pip(result_2, 2, '', ${_output[2]:nr}, 'susie', 'finemap', 'PIP SuSiE', 'PIP FINEMAP')
plot_pip(result_3, 3, '', ${_output[2]:nr}, 'susie', 'finemap', 'PIP SuSiE', 'PIP FINEMAP')
merge_img(${_output[2]:nr}, 3)
# dap vs caviar
plot_pip(result_1, 1, '', ${_output[3]:nr}, 'dap', 'caviar', 'PIP DAP-G', 'PIP CAVIAR')
plot_pip(result_2, 2, '', ${_output[3]:nr}, 'dap', 'caviar', 'PIP DAP-G', 'PIP CAVIAR')
plot_pip(result_3, 3, '', ${_output[3]:nr}, 'dap', 'caviar', 'PIP DAP-G', 'PIP CAVIAR')
merge_img(${_output[3]:nr}, 3)
# dap vs finemap
plot_pip(result_1, 1, '', ${_output[4]:nr}, 'dap', 'finemap', 'PIP DAP-G', 'PIP FINEMAP')
plot_pip(result_2, 2, '', ${_output[4]:nr}, 'dap', 'finemap', 'PIP DAP-G', 'PIP FINEMAP')
plot_pip(result_3, 3, '', ${_output[4]:nr}, 'dap', 'finemap', 'PIP DAP-G', 'PIP FINEMAP')
merge_img(${_output[4]:nr}, 3)
# caviar vs finemap
plot_pip(result_1, 1, '', ${_output[5]:nr}, 'caviar', 'finemap', 'PIP CAVIAR', 'PIP FINEMAP')
plot_pip(result_2, 2, '', ${_output[5]:nr}, 'caviar', 'finemap', 'PIP CAVIAR', 'PIP FINEMAP')
plot_pip(result_3, 3, '', ${_output[5]:nr}, 'caviar', 'finemap', 'PIP CAVIAR', 'PIP FINEMAP')
merge_img(${_output[5]:nr}, 3)
[power_2]
# Power analysis
# to match with DAP -ld_control 0.25
# to match with susie 95% mappable CS, we set dap cutoff to 0.95 also
dap_cluster_cutoff = [('cluster_prob', 0.95), ('cluster_avg_r2', 0.25)]
input: for_each = ['dap_cluster_cutoff', 'null_weight', 'susie_prior'], group_by = 1, concurrent = True
output: f'{dirname}/Power_comparison_{date}_{_dap_cluster_cutoff[0]}_prior_{fmtP(_susie_prior)}_null_{fmtP(_null_weight)}.rds'
R: stdout = f'{_output:n}.log', expand = '${ }', workdir = cwd
dat = readRDS(${_input:r})
dap_out = dat$dap
susie_out = dat$susie
# favorite susie flavor
susie_out = susie_out[which(susie_out$fit_susie.prior_var == ${_susie_prior} & susie_out$fit_susie.null_weight == ${_null_weight}), ]
susie_out = subset(susie_out, select =-c(lm_less.pve, fit_susie.prior_var, fit_susie.null_weight))
data_sets = unique(susie_out$liter_data.dataset)
n_signals = unique(susie_out$lm_less.n_signal)
n_r = 2
n_experiments = n_r * length(data_sets)
result = NULL
overlap_cs = list()
for (s in n_signals) {
susie_signals = 0
dap_signals = 0
susie_size = 0
dap_size = 0
# I cannot find a good median tracker so do it stupid way: save all and take median later
susie_sizes = vector()
dap_sizes = vector()
# do the same for mean ...
susie_avg_ld = vector()
dap_avg_ld = vector()
susie_tdc = 0
dap_tdc = 0
susie_dc = 0
dap_dc = 0
susie_tc = 0
dap_tc = 0
overlap_cs[[as.character(s)]] = NULL
for (d in data_sets) {
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d), c("lm_less.output.file"),drop=FALSE]
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$data$true_coef
dap_files = dap_out[which(dap_out$lm_less.n_signal == s & dap_out$liter_data.dataset == d),c("fit_dap.output.file"),drop=FALSE]
dap = dscrutils::read_dsc(paste0(${dirname:r}, '/', dap_files[1,1]))$posterior
susie_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("fit_susie.output.file"),drop=FALSE]
susie = dscrutils::read_dsc(paste0(${dirname:r}, '/', susie_files[1,1]))$posterior
for (r in 1:n_r) {
signals = which(truth[,r]!=0)
# SuSiE in CS
susie_cs = list()
if (!is.null(susie[[r]]$cs)) {
for (i in 1:nrow(susie[[r]]$cs)) {
susie_cs[[i]] = as.integer(unlist(strsplit(susie[[r]]$cs$variable[i], ",")))
if (any(signals %in% susie_cs[[i]])) {
susie_size = susie_size + length(susie_cs[[i]])
susie_sizes = c(susie_sizes, length(susie_cs[[i]]))
susie_avg_ld = c(susie_avg_ld, sqrt(susie[[r]]$cs$cs_avg_r2[i]))
susie_tdc = susie_tdc + 1
susie_dc = susie_dc + 1
}
}
susie_signals = susie_signals + sum(signals %in% unique(unlist(susie_cs)))
}
# check susie CS overlapping status
if (length(susie_cs) > 0) {
for (i in 1:length(susie_cs)) {
for (j in 1:i) {
if (i == j) next
status = intersect(susie_cs[[i]], susie_cs[[j]])
if (length(status)>0) {
if (is.null(overlap_cs[[as.character(s)]])) overlap_cs[[as.character(s)]] = c(d, r, length(susie_cs[[i]]), length(susie_cs[[j]]), length(status))
else overlap_cs[[as.character(s)]] = rbind(overlap_cs[[as.character(s)]], c(d, r, length(susie_cs[[i]]), length(susie_cs[[j]]), length(status)))
}
}
}
}
print(paste('==============', s, '=============='))
print(susie_cs)
susie_tc = susie_tc + length(susie_cs)
# DAP in cluster
dap_cluster_raw = dap[[as.character(r-1)]]$set[which(dap[[as.character(r-1)]]$set$${_dap_cluster_cutoff[0]} > ${_dap_cluster_cutoff[1]}), ]
dap_cluster_ld = dap_cluster_raw$cluster_avg_r2
dap_cluster_raw = dap_cluster_raw$snp
dap_cluster = list()
if (length(dap_cluster_raw) > 0) {
for (i in 1:length(dap_cluster_raw)) {
dap_cluster[[i]] = as.integer(unlist(strsplit(dap_cluster_raw[i], ",")))
if (any(signals %in% dap_cluster[[i]])) {
dap_size = dap_size + length(dap_cluster[[i]])
dap_sizes = c(dap_sizes, length(dap_cluster[[i]]))
dap_avg_ld = c(dap_avg_ld, sqrt(dap_cluster_ld[i]))
dap_tdc = dap_tdc + 1
dap_dc = dap_dc + 1
}
}
dap_signals = dap_signals + sum(signals %in% unique(unlist(dap_cluster)))
}
print(dap_cluster)
dap_tc = dap_tc + length(dap_cluster)
## BEGIN debug
## susie made more true discovery than DAP
if (susie_dc > dap_dc) {
print('DAP miss')
print(dap[[as.character(r-1)]]$set)
print(d)
print(r)
print(dap_files)
}
## DAP made false discovery, susie did not
## under n = 1
if (length(dap_cluster) > dap_dc && s == 1) {
print('DAP false discovery')
print(dap[[as.character(r-1)]]$set)
print(d)
print(r)
print(dap_files)
}
## SuSiE made false discovery
## under n = 3
if (length(susie_cs) > susie_dc && s == 3) {
print('SuSiE false discovery in data ...')
print(d)
print(r)
print(length(susie_cs))
print(susie_dc)
print(susie_files)
}
## END debug
susie_dc = 0
dap_dc = 0
}
}
rates = c(s, s*n_experiments, susie_tc, dap_tc, susie_signals/s/n_experiments, dap_signals/s/n_experiments, susie_tdc/susie_tc, dap_tdc/dap_tc, susie_size / susie_tdc, dap_size / dap_tdc, median(susie_sizes), median(dap_sizes), mean(susie_avg_ld,na.rm=T), mean(dap_avg_ld))
if (is.null(result)) {
result = rates
} else {
result = rbind(result, rates)
}
}
colnames(result) = c('n_signal', 'expected_discoveries', 'susie_discoveries', 'dap_discoveries', 'susie_power', 'dap_power', 'susie_coverage', 'dap_coverage', 'susie_avg_size', 'dap_avg_size', 'susie_median_size', 'dap_median_size', 'susie_avg_ld', 'dap_avg_ld')
rownames(result) = as.character(result[,1])
saveRDS(data.frame(result), ${_output:r})
saveRDS(overlap_cs, "${_output:n}.overlap_status.rds")
[cali_pip_2]
bin_size = 20
input: for_each = ['null_weight', 'susie_prior'], concurrent = True
output: f'{_input:n}_calibrated_prior_{fmtP(_susie_prior)}_null_{fmtP(_null_weight)}.rds'
R: stdout = f'{_output:n}.log', expand = '${ }', workdir = cwd
pip_cutoff = 0
dat = readRDS(${_input:r})
dap_out = dat$dap
caviar_out = dat$caviar
finemap_out = dat$finemap
susie_out = dat$susie
# favorite susie flavor
susie_out = susie_out[which(susie_out$fit_susie.prior_var == ${_susie_prior} & susie_out$fit_susie.null_weight == ${_null_weight}), ]
susie_out = subset(susie_out, select =-c(lm_less.pve, fit_susie.prior_var, fit_susie.null_weight))
data_sets = unique(susie_out$liter_data.dataset)
n_signals = unique(susie_out$lm_less.n_signal)
result = list()
pip_cali = list()
for (s in n_signals) {
result[[as.character(s)]] = NULL
pip_cali[[as.character(s)]] = list(susie = c(0,0,0), dap = c(0,0,0), caviar = c(0,0,0))
if (s > 3) {
has_caviar = FALSE
} else {
has_caviar = TRUE
}
for (d in data_sets) {
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("lm_less.output.file"),drop=FALSE]
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$data$true_coef
for (r in c(1,2)) {
signals = truth[,r]
signals[which(signals!=0)] = 1
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("fit_susie.output.file"),drop=FALSE]
susie = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
snp = susie[[r]]$vars
# do not restrict PIP
in_CI = 1:nrow(snp)
snp = snp[which(snp$variable %in% in_CI),]
snp = snp[match(in_CI, snp$variable),]
#print(head(snp))
susie = as.vector(snp$variable_prob)
dap = as.vector(snp$snp_prob)
out_files = dap_out[which(dap_out$lm_less.n_signal == s & dap_out$liter_data.dataset == d),c("fit_dap.output.file"),drop=FALSE]
dap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
snp = dap[[as.character(r-1)]]$snp
snp = snp[which(snp$snp %in% as.character(in_CI)),]
snp = snp[match(in_CI, snp$snp),]
#print(head(snp))
dap = as.vector(snp$snp_prob)
if (has_caviar) {
# caviar
out_files = caviar_out[which(caviar_out$lm_less.n_signal == s & caviar_out$liter_data.dataset == d & caviar_out$fit_caviar.args == paste('-g 0.001 -c', s)),c("fit_caviar.output.file"),drop=FALSE]
caviar = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
snp = caviar[[r]]$snp
snp = snp[which(snp$snp %in% as.character(in_CI)),]
snp = snp[match(in_CI, snp$snp),]
#print(head(snp))
caviar = as.vector(snp$snp_prob)
# finemap
out_files = finemap_out[which(finemap_out$lm_less.n_signal == s & finemap_out$liter_data.dataset == d & finemap_out$fit_finemap.args == paste('--n-causal-max', s)),c("fit_finemap.output.file"),drop=FALSE]
finemap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
snp = finemap[[r]]$snp
snp = snp[which(snp$snp %in% as.character(in_CI)),]
snp = snp[match(in_CI, snp$snp),]
#print(head(snp))
finemap = as.vector(snp$snp_prob)
pip = cbind(susie, dap, caviar, finemap, signals[in_CI])
} else {
pip = cbind(susie, dap, signals[in_CI])
}
if (is.null(result[[as.character(s)]])) {
result[[as.character(s)]] = pip
} else {
result[[as.character(s)]] = rbind(result[[as.character(s)]], pip)
}
}
}
# make data frame
res = data.frame(result[[as.character(s)]])
if (has_caviar) {
colnames(res) = c('susie', 'dap', 'caviar', 'finemap', 'is_signal')
names = c('susie', 'dap', 'caviar', 'finemap')
} else {
colnames(res) = c('susie', 'dap', 'is_signal')
names = c('susie', 'dap')
}
# make bins
bins = cbind(seq(1:${bin_size})/${bin_size}-1/${bin_size}, seq(1:${bin_size})/${bin_size})
for (name in names) {
for (i in 1:nrow(bins)) {
tmp = res[which(res[[name]] > bins[i,1] & res[[name]] < bins[i,2]),]
pip_cali[[as.character(s)]][[name]] = rbind(pip_cali[[as.character(s)]][[name]], c(sum(tmp[[name]]), sum(tmp$is_signal), length(tmp$is_signal)))
}
pip_cali[[as.character(s)]][[name]][which(is.na(pip_cali[[as.character(s)]][[name]]))] = 0
}
}
susie = pip_cali[[as.character(1)]]$susie
for (i in 2:5) susie = susie + pip_cali[[as.character(i)]]$susie
dap = pip_cali[[as.character(1)]]$dap
for (i in 2:5) dap = dap + pip_cali[[as.character(i)]]$dap
cav = pip_cali[[as.character(1)]]$cav
for (i in 2:3) cav = cav + pip_cali[[as.character(i)]]$cav
finemap = pip_cali[[as.character(1)]]$finemap
for (i in 2:3) finemap = finemap + pip_cali[[as.character(i)]]$finemap
susie[,c(1,2)] = susie[,c(1,2)] / susie[,3]
dap[,c(1,2)] = dap[,c(1,2)] / dap[,3]
cav[,c(1,2)] = cav[,c(1,2)] / cav[,3]
finemap[,c(1,2)] = finemap[,c(1,2)] / finemap[,3]
saveRDS(list("SuSiE"=susie[-1,], "DAP-G"=dap[-1,], "CAVIAR"=cav[-1,], "FINEMAP"=finemap[-1,]), ${_output:r})
[cali_pip_3]
depends: executable('convert')
input: group_by = 1, concurrent = True
output: f'{_input:n}.png'
R: expand = '${ }'
library(ggplot2)
library(cowplot)
dot_plot = function(dataframe) {
ggplot(dataframe, aes(x=mean_pip, y=observed_freq)) +
geom_errorbar(aes(ymin=observed_freq-se, ymax=observed_freq+se), colour="gray", size = 0.2, width=.01) +
geom_point(size=1.5, shape=21, fill="#002b36") + # 21 is filled circle
xlab("Mean PIP") +
ylab("Observed frequency") +
coord_cartesian(ylim=c(0,1), xlim=c(0,1)) +
geom_abline(slope=1,intercept=0,colour='red', size=0.2) +
ggtitle(name) +
expand_limits(y=0) + # Expand y range
theme_cowplot()
}
dat = readRDS(${_input:r})
idx = 0
for (name in names(dat)) {
idx = idx + 1
dat[[name]][,3] = sqrt(dat[[name]][,2] * (1 - dat[[name]][,2]) / dat[[name]][,3]) * 2
dat[[name]] = as.data.frame(dat[[name]])
colnames(dat[[name]]) = c("mean_pip", "observed_freq", "se")
pdf(paste0(${_output:nr}, '_' , idx, '.pdf'), width=3, height=3, pointsize=16)
print(dot_plot(dat[[name]]))
dev.off()
system(paste0("convert -density 120 ", ${_output:nr}, '_' , idx, '.pdf', " ", ${_output:nr}, '_' , idx, '.png'))
}
files = paste0(${_output:nr}, '_', seq(1:idx), '.png')
cmd = paste('convert +append', paste(files, collapse=" "), ${_output:r})
system(cmd)
system(paste('rm -f', paste(files, collapse=" ")))
[coverage_2]
ld_cutoff = 0.5
parameter: n_signals = 5
input: for_each = ['susie_prior'], concurrent = True
output: f'{dirname}/Coverage_{date}_prior_{fmtP(_susie_prior)}_null_{fmtP(null_weight[0])}_signals_{n_signals}.rds'
R: stdout = f'{_output:n}.log', expand = '${ }', workdir = cwd
ld_cutoff = ${ld_cutoff}
dat = readRDS(${_input:r})
susie_out = dat$susie
# favorit susie flavor
susie_out = susie_out[which(susie_out$fit_susie.prior_var == ${_susie_prior} & susie_out$fit_susie.null_weight == ${null_weight[0]}), ]
susie_out = subset(susie_out, select =-c(lm_less.pve, fit_susie.prior_var, fit_susie.null_weight))
data_sets = unique(susie_out$liter_data.dataset)
if (is.null(${n_signals})) {
n_signals = unique(susie_out$lm_less.n_signal)
} else {
n_signals = 1:${n_signals}
}
positives = list()
for (s in n_signals) {
positives[[as.character(s)]] = list()
print(paste('========', s, '========'))
for (d in data_sets) {
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("fit_susie.output.file", "liter_data.output.file", "lm_less.output.file")]
fit = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$fit
X = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,2]))$data$X
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,3]))$data$true_coef
for (r in c(1,2)) {
signals = which(truth[,r]!=0)
susie_cs_all = list()
for (level in c(0.01, 0.05, 0.1, 0.15, 0.2, 0.25)) {
susie_cs_all[[as.character(level*100)]] = susieR::susie_get_CS(fit[[r]], coverage=1-level, X = X, min_abs_corr = ld_cutoff)$cs
if (! as.character(level*100) %in% names(positives[[as.character(s)]])) {
positives[[as.character(s)]][[as.character(level*100)]] = c(0,0)
}
if (length(susie_cs_all[[as.character(level*100)]]) > 0) {
for (i in 1:length(susie_cs_all[[as.character(level*100)]])) {
susie_cs = susie_cs_all[[as.character(level*100)]][[i]]
if (length(susie_cs) == 0) {
next
}
if (any(signals %in% susie_cs)) {
positives[[as.character(s)]][[as.character(level*100)]][1] = positives[[as.character(s)]][[as.character(level*100)]][1] + 1
} else {
print(paste(d, r, level, i))
print(susie_cs)
print(signals)
positives[[as.character(s)]][[as.character(level*100)]][2] = positives[[as.character(s)]][[as.character(level*100)]][2] + 1
}
}
}
}
}
}
print(positives[[as.character(s)]])
}
saveRDS(positives, ${_output:r})
[coverage_3]
parameter: n_signals = 5
output: f'{dirname}/Coverage_{date}_null_weight_{fmtP(null_weight[0])}_signals_{n_signals}.png'
R: expand = '${ }'
get_summary = function(dat, names) {
res = c(0,0)
for (level in c(0.01, 0.05, 0.1, 0.15, 0.2, 0.25)) {
s = as.character(level*100)
d = do.call(rbind, lapply(1:length(dat), function(i) dat[[as.character(i)]][[s]]))
d = colSums(d)
res = rbind(res, c(level, d[2]/(d[1]+d[2])))
}
res = data.frame(res[-1,])
colnames(res) = names
res
}
dat1 = get_summary(readRDS(${_input[0]:r}), c('expected', 'fdp_fixed_prior'))
dat2 = get_summary(readRDS(${_input[-1]:r}), c('expected', 'fdp_estimated_prior'))
dat = merge(dat1,dat2,by='expected')
lower = min(1 - dat$fdp_estimated_prior, 1 - dat$fdp_fixed_prior, 1 - dat$expected)
upper = max(1 - dat$fdp_estimated_prior, 1 - dat$fdp_fixed_prior, 1 - dat$expected)
pdf("${_output:n}.pdf", width=5, height=5, pointsize=15)
plot(1 - dat$expected, 1 - dat$fdp_fixed_prior, t="l", xlim = c(lower,upper), ylim = c(lower,upper),
col='#A60628', ylab = "observed coverage", xlab ="expected coverage", main = paste("1 ~", ${n_signals}, "effect variables"), bty='l')
lines(1 - dat$expected, 1 - dat$fdp_estimated_prior, xlim = c(lower,upper), ylim = c(lower,upper), col='#348ABD')
legend("bottomright", legend=c("SuSiE (fixed prior)", "SuSiE (estimated prior)"),
col=c('#A60628', '#348ABD'), lty=c(1,1), cex=0.8)
abline(0,1,col='gray')
dev.off()
system(paste0("convert -density 120 ", ${_output:nr}, '.pdf', " ", ${_output:r}))
[roc_2]
pip_cutoff = 0.3
dap_cluster_cutoff = ('cluster_prob', 0.95)
input: for_each = 'null_weight', concurrent = True
output: f'{dirname}/ROC_{date}_prior_{fmtP(susie_prior[0])}_null_{fmtP(_null_weight)}_two.rds', f'{dirname}/ROC_{date}_prior_{fmtP(susie_prior[0])}_null_{fmtP(_null_weight)}_all.rds'
R: stdout = f'{_output[0]:n}.log', expand = '${ }', workdir = cwd
dat = readRDS(${_input:r})
dap_out = dat$dap
susie_out = dat$susie
caviar_out = dat$caviar
finemap_out = dat$finemap
# favorit susie flavor
susie_out = susie_out[which(susie_out$fit_susie.prior_var == ${susie_prior[0]} & susie_out$fit_susie.null_weight == ${_null_weight}), ]
susie_out = subset(susie_out, select =-c(lm_less.pve, fit_susie.prior_var, fit_susie.null_weight))
data_sets = unique(susie_out$liter_data.dataset)
n_signals = unique(susie_out$lm_less.n_signal)
result = list()
result_all = list()
for (s in n_signals) {
result[[as.character(s)]] = list(susie=NULL, dap=NULL)
result_all[[as.character(s)]] = list(susie=NULL, dap=NULL, caviar=NULL, finemap=NULL)
for (d in data_sets) {
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("lm_less.output.file"),drop=FALSE]
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$data$true_coef
for (r in 1:2) {
signals = truth[,r]
signals[which(signals!=0)] = 1
# susie
out_files = susie_out[which(susie_out$lm_less.n_signal == s & susie_out$liter_data.dataset == d),c("fit_susie.output.file"),drop=FALSE]
susie = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
pip = susie[[r]]$vars
pip = pip[order(as.numeric(pip$variable)),]$variable_prob
if (is.null(result[[as.character(s)]]$susie)) {
result[[as.character(s)]]$susie = cbind(pip, signals)
if (s <= 3) result_all[[as.character(s)]]$susie = cbind(pip, signals)
} else {
result[[as.character(s)]]$susie = rbind(result[[as.character(s)]]$susie, cbind(pip, signals))
if (s <= 3) result_all[[as.character(s)]]$susie = rbind(result_all[[as.character(s)]]$susie, cbind(pip, signals))
}
# dap
out_files = dap_out[which(dap_out$lm_less.n_signal == s & dap_out$liter_data.dataset == d),c("fit_dap.output.file"),drop=FALSE]
dap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
pip = dap[[as.character(r-1)]]$snp
pip = pip[order(as.numeric(pip$snp)),]$snp_prob
if (is.null(result[[as.character(s)]]$dap)) {
result[[as.character(s)]]$dap = cbind(pip, signals)
if (s <= 3) result_all[[as.character(s)]]$dap = cbind(pip, signals)
} else {
result[[as.character(s)]]$dap = rbind(result[[as.character(s)]]$dap, cbind(pip, signals))
if (s <= 3) result_all[[as.character(s)]]$dap = rbind(result_all[[as.character(s)]]$dap, cbind(pip, signals))
}
if (s <= 3) {
# CAVIAR
out_files = caviar_out[which(caviar_out$lm_less.n_signal == s & caviar_out$liter_data.dataset == d & caviar_out$fit_caviar.args == paste('-g 0.001 -c', s)),c("fit_caviar.output.file"),drop=FALSE]
caviar = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
pip = caviar[[r]]$snp
pip = pip[order(as.numeric(pip$snp)),]$snp_prob
if (is.null(result_all[[as.character(s)]]$caviar)) {
result_all[[as.character(s)]]$caviar = cbind(pip, signals)
} else {
result_all[[as.character(s)]]$caviar = rbind(result_all[[as.character(s)]]$caviar, cbind(pip, signals))
}
# FINEMAP
out_files = finemap_out[which(finemap_out$lm_less.n_signal == s & finemap_out$liter_data.dataset == d & finemap_out$fit_finemap.args == paste('--n-causal-max', s)),c("fit_finemap.output.file"),drop=FALSE]
finemap = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$posterior
pip = finemap[[r]]$snp
pip = pip[order(as.numeric(pip$snp)),]$snp_prob
if (is.null(result_all[[as.character(s)]]$finemap)) {
result_all[[as.character(s)]]$finemap = cbind(pip, signals)
} else {
result_all[[as.character(s)]]$finemap = rbind(result_all[[as.character(s)]]$finemap, cbind(pip, signals))
}
}
}
}
}
roc_data = function(d1, cutoff = c(${pip_cutoff}, 1), connect_org = TRUE) {
grid = 500
ttv = seq(1:grid)/grid
ttv = ttv[which(ttv>=cutoff[1] & ttv<=cutoff[2])]
# see SuSiE-Manuscript issue 2
d1 = d1[order(d1[,1]), ]
end = tail(d1[which(d1[,2] == 0),][,1],1)
ttv = c(ttv[-length(ttv)], min(ttv[length(ttv)], end))
# end of issue 2
rst1 = t(sapply(ttv, function(x) c(sum(d1[,2][d1[,1]>=x]), length(d1[,2][d1[,1]>=x]))))
rst1 = cbind(rst1, sum(d1[,2]))
if (connect_org) {
# connect to origin
last_row = tail(rst1, 1)
rst1 = rbind(rst1, c(last_row[1], last_row[2]-1, last_row[3]), c(0.001,0.001,last_row[3]))
}
rst1 = as.data.frame(rst1)
colnames(rst1) = c('true_positive', 'total_positive', 'total_signal')
if (connect_org) {
rst2 = as.data.frame(cbind(rst1$true_positive / rst1$total_positive, rst1$true_positive / rst1$total_signal, c(ttv, ttv[length(ttv)], 1)))
} else {
rst2 = as.data.frame(cbind(rst1$true_positive / rst1$total_positive, rst1$true_positive / rst1$total_signal, ttv))
}
colnames(rst2) = c('Precision', 'Recall', 'Threshold')
return(list(counts = rst1, rates = rst2))
}
saveRDS(result, "${_output[0]:n}.data.rds")
saveRDS(result_all, "${_output[1]:n}.data.rds")
print("Computing ROC data ...")
susie = roc_data(do.call(rbind, lapply(1:length(result), function(i) result[[i]]$susie)))
dap = roc_data(do.call(rbind, lapply(1:length(result), function(i) result[[i]]$dap)), cutoff = c(${pip_cutoff}, 0.9999), connect_org = FALSE) # DAP output format has numerical artifect
saveRDS(list(data = result, susie = susie, dap = dap), ${_output[0]:r})
#
susie = roc_data(do.call(rbind, lapply(1:length(result_all), function(i) result_all[[i]]$susie)))
dap = roc_data(do.call(rbind, lapply(1:length(result_all), function(i) result_all[[i]]$dap)), cutoff = c(${pip_cutoff}, 0.9999), connect_org = FALSE)
caviar = roc_data(do.call(rbind, lapply(1:length(result_all), function(i) result_all[[i]]$caviar)))
finemap = roc_data(do.call(rbind, lapply(1:length(result_all), function(i) result_all[[i]]$finemap)))
saveRDS(list(data = result_all, susie = susie, dap = dap, finemap=finemap, caviar=caviar), ${_output[1]:r})
[roc_3]
parameter: chunks = 0
parameter: smooth = 'FALSE'
parameter: opt1 = "lwd = 2, xlim = c(0,0.2), ylim = c(0,0.2)"
parameter: opt2 = "lwd = 2, xlim = c(0,0.3), ylim = c(0,0.3)"
output: f'{dirname}/ROC_{date}_prior_{fmtP(susie_prior[0])}.png'
R: expand = '${ }'
colors = c('#A60628', '#7A68A6', '#348ABD', '#467821', '#FF0000', '#188487', '#E2A233',
'#A9A9A9', '#000000', '#FF00FF', '#FFD700', '#ADFF2F', '#00FFFF')
d2 = readRDS(${_input[2*len(null_weight)-2]:r})
d1 = readRDS(${_input[0]:r})
d4 = readRDS(${_input[2*len(null_weight)-1]:r})
d3 = readRDS(${_input[1]:r})
library(scam)
create_chunks = function(item, n) {
splitted = suppressWarnings(split(item, 1:n))
return(c(splitted[[1]], splitted[[length(splitted)]][length(splitted[[length(splitted)]])]))
}
make_smooth = function(x,y,subset=${chunks}, smooth = ${smooth}) {
if (smooth) {
if (subset < length(x) && subset > 0) {
x = create_chunks(x, subset)
y = create_chunks(y, subset)
}
dat = data.frame(cbind(x,y))
colnames(dat) = c('x','y')
y=predict(scam(y ~ s(x, bs = "mpi"), data = dat))
}
return(list(x=x,y=y))
}
add_text = function(thresholds, x, y, threshold, color, delta = 0.015) {
idx = which(thresholds == threshold)
text(x[idx] - delta, y[idx], labels = threshold, col = color)
points(x[idx],y[idx])
}
pdf("${_output:n}.part1.pdf", width=5, height=5, pointsize=15)
yy = make_smooth(1 - d1$susie$rates$Precision, d1$susie$rates$Recall)
plot(yy$x, yy$y, t="l", col=colors[1], ylab = "power", xlab ="FDR", main = "1 ~ 5 effect variables", bty='l', ${opt1})
add_text(d1$susie$rates$Threshold, yy$x, yy$y, 0.9, colors[1])
add_text(d1$susie$rates$Threshold, yy$x, yy$y, 0.95, colors[1])
# lines(1 - d2$susie$rates$Precision, d2$susie$rates$Recall, col=colors[2])
yy = make_smooth(1 - d1$dap$rates$Precision, d1$dap$rates$Recall)
lines(yy$x, yy$y, col=colors[3], ${opt1})
add_text(d1$dap$rates$Threshold, yy$x, yy$y, 0.9, colors[3], -0.015)
add_text(d1$dap$rates$Threshold, yy$x, yy$y, 0.95, colors[3], -0.015)
# legend("bottomright", legend=c("SuSiE", "SuSiE (penalized)", "DAP-G"),
legend("bottomright", legend=c("SuSiE", "DAP-G"),
col=colors[c(1,3)], lty=c(1,1,1), cex=0.8)
dev.off()
system(paste0("convert -density 120 ", ${_output:nr}, '.part1.pdf', " ", ${_output:nr}, '.part1.png'))
pdf("${_output:n}.part2.pdf", width=5, height=5, pointsize=15)
yy = make_smooth(1 - d3$susie$rates$Precision, d3$susie$rates$Recall)
plot(yy$x, yy$y, t="l",
col=colors[1], ylab = "power", xlab ="FDR",
main = "1 ~ 3 effect variables", bty='l', ${opt2})
yy = make_smooth(1 - d3$dap$rates$Precision, d3$dap$rates$Recall)
lines(yy$x, yy$y,col=colors[3], ${opt2})
yy = make_smooth(1 - d3$caviar$rates$Precision, d3$caviar$rates$Recall)
lines(yy$x, yy$y,col=colors[4], ${opt2})
yy = make_smooth(1 - d3$finemap$rates$Precision, d3$finemap$rates$Recall)
lines(yy$x, yy$y,col=colors[7], ${opt2})
legend("bottomright", legend=c("SuSiE", "DAP-G", "CAVIAR", "FINEMAP"),
col=c(colors[1], colors[3], colors[4], colors[7]), lty=c(1,1,1), cex=0.8)
dev.off()
system(paste0("convert -density 120 ", ${_output:nr}, '.part2.pdf', " ", ${_output:nr}, '.part2.png'))
bash: expand = True
convert +append {_output:n}.part1.png {_output:n}.part2.png {_output:n}.png