%revisions -s
Previously I've ran this specific DSC using:
dsc susie.dsc --target hard_case -o hard_case
So here I query from that result for DAP power. Again the settings:
susie parameters:
DAP parameters are default.
[global]
parameter: cwd = path('~/GIT/github/mvarbvs/dsc')
parameter: date = '1008'
# set to 0 to use estimated prior, or another number to use specified prior
# as configured in the DSC
parameter: susie_prior = [0.1, 0]
# null weight options to evaluate
parameter: null_weight = [0.0, 0.5, 0.9, 0.95]
dirname = path(f'{cwd:a}/hard_case/')
def fmtP(x):
return str(x).replace(".", "p")
[power_1]
output: f'{dirname}/Power_comparison_{date}.rds'
R: expand = '${ }', workdir = cwd
dap_out = dscrutils::dscquery(${dirname:br},
target = "full_data.dataset full_data lm_less03 lm_less03.pve lm_less03.n_signal fit_dap")
susie_out = dscrutils::dscquery(${dirname:br},
target = "full_data.dataset full_data lm_less03 lm_less03.pve lm_less03.n_signal fit_susie10.prior_var fit_susie10.null_weight fit_susie10")
saveRDS(list(dap=dap_out, susie=susie_out), ${_output:r})
[power_2]
# Power analysis
# to match with DAP -ld_control 0.25
# to match with susie 95% mappable CS, we set dap cutoff to 0.95 also
dap_cluster_cutoff = [('cluster_prob', 0.95), ('cluster_avg_r2', 0.25)]
input: for_each = ['dap_cluster_cutoff', 'null_weight', 'susie_prior'], group_by = 1, concurrent = True
output: f'{dirname}/Power_comparison_{date}_{_dap_cluster_cutoff[0]}_prior_{fmtP(_susie_prior)}_null_{fmtP(_null_weight)}.rds'
R: stdout = f'{_output:n}.log', expand = '${ }', workdir = cwd
dat = readRDS(${_input:r})
dap_out = dat$dap
susie_out = dat$susie
# favorite susie flavor
susie_out = susie_out[which(susie_out$fit_susie10.prior_var == ${_susie_prior} & susie_out$fit_susie10.null_weight == ${_null_weight}), ]
susie_out = subset(susie_out, select =-c(lm_less03.pve, fit_susie10.prior_var, fit_susie10.null_weight))
data_sets = unique(susie_out$full_data.dataset)
n_signals = unique(susie_out$lm_less03.n_signal)
n_r = 2
n_experiments = n_r * length(data_sets)
result = NULL
overlap_cs = list()
for (s in n_signals) {
susie_signals = 0
dap_signals = 0
susie_size = 0
dap_size = 0
# I cannot find a good median tracker so do it stupid way: save all and take median later
susie_sizes = vector()
dap_sizes = vector()
# do the same for mean ...
susie_avg_ld = vector()
dap_avg_ld = vector()
susie_tdc = 0
dap_tdc = 0
susie_dc = 0
dap_dc = 0
susie_tc = 0
dap_tc = 0
overlap_cs[[as.character(s)]] = NULL
for (d in data_sets) {
out_files = susie_out[which(susie_out$lm_less03.n_signal == s & susie_out$full_data.dataset == d), c("lm_less03.output.file"),drop=FALSE]
truth = dscrutils::read_dsc(paste0(${dirname:r}, '/', out_files[1,1]))$data$true_coef
dap_files = dap_out[which(dap_out$lm_less03.n_signal == s & dap_out$full_data.dataset == d),c("fit_dap.output.file"),drop=FALSE]
dap = dscrutils::read_dsc(paste0(${dirname:r}, '/', dap_files[1,1]))$posterior
susie_files = susie_out[which(susie_out$lm_less03.n_signal == s & susie_out$full_data.dataset == d),c("fit_susie10.output.file"),drop=FALSE]
susie = dscrutils::read_dsc(paste0(${dirname:r}, '/', susie_files[1,1]))$posterior
for (r in 1:n_r) {
signals = which(truth[,r]!=0)
# SuSiE in CS
susie_cs = list()
if (!is.null(susie[[r]]$cs)) {
for (i in 1:nrow(susie[[r]]$cs)) {
susie_cs[[i]] = as.integer(unlist(strsplit(susie[[r]]$cs$variable[i], ",")))
if (any(signals %in% susie_cs[[i]])) {
susie_size = susie_size + length(susie_cs[[i]])
susie_sizes = c(susie_sizes, length(susie_cs[[i]]))
susie_avg_ld = c(susie_avg_ld, sqrt(susie[[r]]$cs$cs_avg_r2[i]))
susie_tdc = susie_tdc + 1
susie_dc = susie_dc + 1
}
}
susie_signals = susie_signals + sum(signals %in% unique(unlist(susie_cs)))
}
# check susie CS overlapping status
if (length(susie_cs) > 0) {
for (i in 1:length(susie_cs)) {
for (j in 1:i) {
if (i == j) next
status = intersect(susie_cs[[i]], susie_cs[[j]])
if (length(status)>0) {
if (is.null(overlap_cs[[as.character(s)]])) overlap_cs[[as.character(s)]] = c(d, r, length(susie_cs[[i]]), length(susie_cs[[j]]), length(status))
else overlap_cs[[as.character(s)]] = rbind(overlap_cs[[as.character(s)]], c(d, r, length(susie_cs[[i]]), length(susie_cs[[j]]), length(status)))
}
}
}
}
print(paste('==============', s, '=============='))
print(susie_cs)
susie_tc = susie_tc + length(susie_cs)
# DAP in cluster
dap_cluster_raw = dap[[as.character(r-1)]]$set[which(dap[[as.character(r-1)]]$set$${_dap_cluster_cutoff[0]} > ${_dap_cluster_cutoff[1]}), ]
dap_cluster_ld = dap_cluster_raw$cluster_avg_r2
dap_cluster_raw = dap_cluster_raw$snp
dap_cluster = list()
if (length(dap_cluster_raw) > 0) {
for (i in 1:length(dap_cluster_raw)) {
dap_cluster[[i]] = as.integer(unlist(strsplit(dap_cluster_raw[i], ",")))
if (any(signals %in% dap_cluster[[i]])) {
dap_size = dap_size + length(dap_cluster[[i]])
dap_sizes = c(dap_sizes, length(dap_cluster[[i]]))
dap_avg_ld = c(dap_avg_ld, sqrt(dap_cluster_ld[i]))
dap_tdc = dap_tdc + 1
dap_dc = dap_dc + 1
}
}
dap_signals = dap_signals + sum(signals %in% unique(unlist(dap_cluster)))
}
print(dap_cluster)
dap_tc = dap_tc + length(dap_cluster)
## BEGIN debug
## susie made more true discovery than DAP
if (susie_dc > dap_dc) {
print('DAP miss')
print(dap[[as.character(r-1)]]$set)
print(d)
print(r)
print(dap_files)
}
## DAP made false discovery, susie did not
## under n = 1
if (length(dap_cluster) > dap_dc && s == 1) {
print('DAP false discovery')
print(dap[[as.character(r-1)]]$set)
print(d)
print(r)
print(dap_files)
}
## SuSiE made false discovery
## under n = 3
if (length(susie_cs) > susie_dc && s == 3) {
print('SuSiE false discovery in data ...')
print(d)
print(r)
print(length(susie_cs))
print(susie_dc)
print(susie_files)
}
## END debug
susie_dc = 0
dap_dc = 0
}
}
rates = c(s, s*n_experiments, susie_tc, dap_tc, susie_signals/s/n_experiments, dap_signals/s/n_experiments, susie_tdc/susie_tc, dap_tdc/dap_tc, susie_size / susie_tdc, dap_size / dap_tdc, median(susie_sizes), median(dap_sizes), mean(susie_avg_ld,na.rm=T), mean(dap_avg_ld))
if (is.null(result)) {
result = rates
} else {
result = rbind(result, rates)
}
}
headers = c('n_signal', 'expected_discoveries', 'susie_discoveries', 'dap_discoveries', 'susie_power', 'dap_power', 'susie_coverage', 'dap_coverage', 'susie_avg_size', 'dap_avg_size', 'susie_median_size', 'dap_median_size', 'susie_avg_ld', 'dap_avg_ld')
result = matrix(result, length(n_signals), length(headers))
colnames(result) = headers
rownames(result) = as.character(result[,1])
saveRDS(data.frame(result), ${_output:r})
saveRDS(overlap_cs, "${_output:n}.overlap_status.rds")
%cd ~/GIT/github/mvarbvs/dsc
readRDS('hard_case/DAP_comparison_0801_cluster_prob_estvar_true.rds')
readRDS('hard_case/DAP_comparison_0801_cluster_prob_estvar_false.rds')