Figure to summarize numerical comparison results. See this notebook for its input data.
I plan to make 3 types of comparisons: oracle, mismatched and default. In particular I'll put together results from all scenarios (averaged), singleton scenario and shared (with heterogenous effect size), in 3 panels, for 6 quantities:
%cd ~/GIT/github/mnm-twas/dsc
res = readRDS('../data/finemap_output.query_result.rds')
res = res[,c(2,4,5,6,7,8,9,10,11,12,13,14,15)]
colnames(res) = c('pattern', 'method', 'total', 'valid', 'size', 'purity', 'top_hit', 'total_true', 'total_true_included', 'overlap', 'false_positive_cross_cond', 'false_negative_cross_cond', 'true_positive_cross_cond')
oracle = "oracle"
mismatch = "mismatch"
default = "default"
purity = aggregate(purity~pattern + method, res, mean)
purity$scenario = rep(NA, nrow(purity))
purity$scenario[which(purity$method == purity$pattern & purity$method != 'mixture_1')] = oracle
purity$scenario[which(purity$method != purity$pattern & purity$method != 'mixture_1')] = mismatch
purity$scenario[which(purity$method == "mixture_1")] = default
purity = purity[which(!is.na(purity$scenario)),]
purity_median = aggregate(purity~scenario, purity, median)
purity_median
purity_singleton = purity[which(purity$pattern == 'singleton'),]
purity_median_singleton = aggregate(purity~scenario, purity_singleton, median)
purity_median_singleton
purity_het = purity[which(purity$pattern == 'low_het'),]
purity_median_het = aggregate(purity~scenario, purity_het, median)
purity_median_het
size = aggregate(size~pattern + method, res, mean)
size$scenario = rep(NA, nrow(size))
size$scenario[which(size$method == size$pattern & size$method != 'mixture_1')] = oracle
size$scenario[which(size$method != size$pattern & size$method != 'mixture_1')] = mismatch
size$scenario[which(size$method == "mixture_1")] = default
size = size[which(!is.na(size$scenario)),]
size_median = aggregate(size~scenario, size, median)
size_singleton = size[which(size$pattern == 'singleton'),]
size_median_singleton = aggregate(size~scenario, size_singleton, median)
size_het = size[which(size$pattern == 'low_het'),]
size_median_het = aggregate(size~scenario, size_het, median)
size_median
size_median_singleton
size_median_het
valid = aggregate(valid ~ pattern + method, res, sum)
total = aggregate(total ~ pattern + method, res, sum)
fdr = merge(valid, total, by = c("pattern", "method"))
fdr$fdr = (fdr$total - fdr$valid)/fdr$total
fdr$scenario = rep(NA, nrow(fdr))
fdr$scenario[which(fdr$method == fdr$pattern & fdr$method != 'mixture_1')] = oracle
fdr$scenario[which(fdr$method != fdr$pattern & fdr$method != 'mixture_1')] = mismatch
fdr$scenario[which(fdr$method == "mixture_1")] = default
fdr = fdr[which(!is.na(fdr$scenario)),]
fdr_mean = aggregate(fdr~scenario, fdr, mean)
fdr_singleton = fdr[which(fdr$pattern == 'singleton'),]
fdr_mean_singleton = aggregate(fdr~scenario, fdr_singleton, mean)
fdr_het = fdr[which(fdr$pattern == 'low_het'),]
fdr_mean_het = aggregate(fdr~scenario, fdr_het, mean)
fdr_mean
fdr_mean_singleton
fdr_mean_het
total_true_included = aggregate(total_true_included ~ pattern + method, res, sum)
total_true = aggregate(total_true ~ pattern + method, res, sum)
overlap = aggregate(overlap ~ pattern + method, res, mean)
power = merge(total_true_included, total_true, by = c("pattern", "method"))
power = merge(power, overlap, by = c("pattern", "method"))
power$power = power$total_true_included/power$total_true
power$scenario = rep(NA, nrow(power))
power$scenario[which(power$method == power$pattern & power$method != 'mixture_1')] = oracle
power$scenario[which(power$method != power$pattern & power$method != 'mixture_1')] = mismatch
power$scenario[which(power$method == "mixture_1")] = default
power = power[which(!is.na(power$scenario)),]
power_mean = aggregate(power~scenario, power, mean)
power_singleton = power[which(power$pattern == 'singleton'),]
power_mean_singleton = aggregate(power~scenario, power_singleton, mean)
power_het = power[which(power$pattern == 'low_het'),]
power_mean_het = aggregate(power~scenario, power_het, mean)
power_mean
power_mean_singleton
power_mean_het
tp = aggregate(true_positive_cross_cond ~ pattern + method, res, sum)
fp = aggregate(false_positive_cross_cond ~ pattern + method, res, sum)
fdr_cond = merge(tp, fp, by = c("pattern", "method"))
fdr_cond$fdr_cond = fdr_cond$false_positive_cross_cond/(fdr_cond$true_positive_cross_cond + fdr_cond$false_positive_cross_cond)
fdr_cond = fdr_cond[order(fdr_cond$method),]
fdr_cond$scenario = rep(NA, nrow(fdr_cond))
fdr_cond$scenario[which(fdr_cond$method == fdr_cond$pattern & fdr_cond$method != 'mixture_1')] = oracle
fdr_cond$scenario[which(fdr_cond$method != fdr_cond$pattern & fdr_cond$method != 'mixture_1')] = mismatch
fdr_cond$scenario[which(fdr_cond$method == "mixture_1")] = default
fdr_cond = fdr_cond[which(!is.na(fdr_cond$scenario)),]
fdr_cond_mean = aggregate(fdr_cond~scenario, fdr_cond, mean)
fdr_cond_singleton = fdr_cond[which(fdr_cond$pattern == 'singleton'),]
fdr_cond_mean_singleton = aggregate(fdr_cond~scenario, fdr_cond_singleton, mean)
fdr_cond_het = fdr_cond[which(fdr_cond$pattern == 'low_het'),]
fdr_cond_mean_het = aggregate(fdr_cond~scenario, fdr_cond_het, mean)
fdr_cond_mean
fdr_cond_mean_singleton
fdr_cond_mean_het
tp = aggregate(true_positive_cross_cond ~ pattern + method, res, sum)
fn = aggregate(false_negative_cross_cond ~ pattern + method, res, sum)
power_cond = merge(tp, fn, by = c("pattern", "method"))
power_cond$power_cond = power_cond$true_positive_cross_cond/(power_cond$true_positive_cross_cond + power_cond$false_negative_cross_cond)
power_cond$scenario = rep(NA, nrow(power_cond))
power_cond$scenario[which(power_cond$method == power_cond$pattern & power_cond$method != 'mixture_1')] = oracle
power_cond$scenario[which(power_cond$method != power_cond$pattern & power_cond$method != 'mixture_1')] = mismatch
power_cond$scenario[which(power_cond$method == "mixture_1")] = default
power_cond = power_cond[which(!is.na(power_cond$scenario)),]
power_cond_mean = aggregate(power_cond~scenario, power_cond, mean)
power_cond_singleton = power_cond[which(power_cond$pattern == 'singleton'),]
power_cond_mean_singleton = aggregate(power_cond~scenario, power_cond_singleton, mean)
power_cond_het = power_cond[which(power_cond$pattern == 'low_het'),]
power_cond_mean_het = aggregate(power_cond~scenario, power_cond_het, mean)
power_cond_mean
power_cond_mean_singleton
power_cond_mean_het
Notice the per condition power looks a lot higher than the other analysis, because most powerful tests are for shared effects, which will get counted $R$ times for each signal in per condition analysis, but only are counted one time in overall assessment. Therefore singleton scenarios are relatively more abundant in overall assessment, leading to less powerful tests.
ls()
output = list(fdr_cond=fdr_cond_mean, fdr_cond_het=fdr_cond_mean_het, fdr_cond_singleton=fdr_cond_mean_singleton,
fdr=fdr_mean, fdr_het=fdr_mean_het, fdr_singleton=fdr_mean_singleton,
power_cond=power_cond_mean, power_cond_het=power_cond_mean_het, power_cond_singleton=power_cond_mean_singleton,
power=power_mean, power_het=power_mean_het, power_singleton=power_mean_singleton,
purity=purity_median, purity_het=purity_median_het, purity_singleton=purity_median_singleton,
size=size_median, size_het=size_median_het, size_singleton=size_median_singleton)
saveRDS(output, '../data/finemap_output.summarized_result.rds')
output = readRDS('../data/finemap_output.summarized_result.rds')
get_table = function(output, key) {
dat = Reduce(function(x, y) merge(x, y, by = 'scenario'), list(output[[paste0(key,'_het')]], output[[paste0(key,'_singleton')]], output[[key]]))
dat = reshape2::melt(dat, id.vars = c("scenario"))
dat$variable = NULL
dat = cbind(dat, c(rep('shared',3), rep('singleton',3), rep('all',3)))
dat = cbind(dat, c(1,1,1,2,2,2,3,3,3))
dat = cbind(dat, c(2,3,1,2,3,1,2,3,1))
names(dat) = c('method', key, 'scenario', 'o1', 'o2')
return(dat)
}
colors = RColorBrewer::brewer.pal(n = 8, name = "Set1")
colors
library(lattice)
default_panel = function(x, y, ...) {
panel.points(x, y, col=THE_COLOR, pch=16, cex=1.1)
}
W = 3.2
H2 = 4
H1 = 2.9
size = get_table(output, 'size')
pdf('1.pdf', width=W, height=H1)
THE_COLOR=colors[3]
dotplot(size ~ reorder(method, o2) | reorder(scenario, o1), data=size,
auto.key=list(columns=2), scale=list(alternating=1, x = list(draw=F)),
layout = c(3,1), angle = 45, panel=default_panel,
xlab="", ylab = "Median number of variables")
dev.off()
size
output$size
output$size_singleton
output$size_het
%preview 1.pdf -s png --dpi 100
purity = get_table(output, 'purity')
pdf('2.pdf', width=W, height=H1)
THE_COLOR=colors[2]
dotplot(purity ~ reorder(method, o2) | reorder(scenario, o1), data=purity,
auto.key=list(columns=2), scale=list(alternating=1, x = list(draw = F)),
layout = c(3,1), angle = 45, panel=default_panel, ylim = c(0.8,1),
xlab="", ylab = "Minimum absolute correlation")
dev.off()
%preview 2.pdf -s png --dpi 100
fdr = get_table(output, 'fdr')
pdf('3.pdf', width=W, height=H1)
THE_COLOR = colors[1]
my_panel = function(x, y, subscripts, ...) {
panel.points(x, y, col=THE_COLOR, pch=16, cex=1.1)
panel.abline(h=0.95, col = "#800000", lty = 2)
}
fdr$coverage = 1 - fdr$fdr
dotplot(coverage ~ reorder(method, o2) | reorder(scenario, o1), data=fdr,
auto.key=list(columns=2), scales=list(alternating=1, x = list(draw = F)),
layout = c(3,1), angle = 45, panel = my_panel,
xlab="", ylab = "Cross-condition coverage", ylim = c(0.4,1))
dev.off()
%preview 3.pdf -s png --dpi 100
power = get_table(output, 'power')
pdf('4.pdf', width=W, height=H2)
THE_COLOR = colors[4]
dotplot(power ~ reorder(method, o2) | reorder(scenario, o1), data=power,
auto.key=list(columns=2), scales=list(alternating=1, x = list(rot = 45)),
layout = c(3,1), angle = 45, panel = default_panel,
xlab="Method", ylab = "Cross-condition power", ylim = c(0.4,1.05))
dev.off()
%preview 4.pdf -s png --dpi 100
fdr_cond = get_table(output, 'fdr_cond')
pdf('5.pdf', width=W, height=H2)
THE_COLOR = colors[5]
my_panel = function(x, y, subscripts, ...) {
panel.points(x, y, col=THE_COLOR, pch=16, cex=1.1)
panel.abline(h=0.05, col = "#800000", lty = 2)
}
dotplot(fdr_cond ~ reorder(method, o2) | reorder(scenario, o1), data=fdr_cond,
auto.key=list(columns=2), scales=list(alternating=1, x = list(rot = 45)),
layout = c(3,1), angle = 45, panel = my_panel, ylim = c(0,0.6),
xlab="Method", ylab = "Condition specific FDR (for lfsr<0.05)")
dev.off()
%preview 5.pdf -s png --dpi 100
power_cond = get_table(output, 'power_cond')
pdf('6.pdf', width=W, height=H2)
THE_COLOR = colors[8]
dotplot(power_cond ~ reorder(method, o2) | reorder(scenario, o1), data=power_cond,
auto.key=list(columns=2), scales=list(alternating=1, x = list(rot = 45)),
layout = c(3,1), angle = 45, panel = default_panel, ylim = c(0.75,1.05),
xlab="Method", ylab = "Condition specific power (for lfsr<0.05)")
dev.off()
%preview 6.pdf -s png --dpi 100
mkdir -p finemap_output
convert -density 500 -quality 100 \
\( 1.pdf 2.pdf 3.pdf -density 500 -quality 100 +append \) \
\( 4.pdf 5.pdf 6.pdf +append \) \
-append finemap_output/finemap_output_comparisons.png # add parameter -flatten to make it non-transparent
%preview finemap_output/finemap_output_comparisons.png --width 50%