Multivariate Bayesian variable selection regression

Pre-computing various second-moment related quantities

This saves computation for M&M by precomputing and re-using quantitaties shared between iterations. It mostly saves $O(R^3)$ computations. This vignette shows results agree with the original version. Cannot use unit test due to numerical discrepency between chol of amardillo and R -- this has been shown problematic for some computations. I'll have to improve mashr for it.

In [1]:
muffled_chol = function(x, ...)
withCallingHandlers(chol(x, ...),
                    warning = function(w) {
                      if (grepl("the matrix is either rank-deficient or indefinite", w$message))
                        invokeRestart("muffleWarning")
                    } )
In [2]:
set.seed(1)
library(mashr)
simdata = simple_sims(500,5,1)
data = mash_set_data(simdata$Bhat, simdata$Shat, alpha = 0)
U.c = cov_canonical(data)
grid = mashr:::autoselect_grid(data,sqrt(2))
Ulist = mashr:::normalize_Ulist(U.c)
xUlist = expand_cov(Ulist,grid,TRUE)
llik_mat0 = mashr:::calc_lik_rcpp(t(data$Bhat),t(data$Shat),data$V,
                             matrix(0,0,0), simplify2array(xUlist),T,T)$data
svs = data$Shat[1,] * t(data$V * data$Shat[1,])
sigma_rooti = list()
for (i in 1:length(xUlist)) sigma_rooti[[i]] = t(backsolve(muffled_chol(svs + xUlist[[i]], pivot=T), diag(nrow(svs))))
llik_mat = mashr:::calc_lik_common_rcpp(t(data$Bhat),
                                 simplify2array(sigma_rooti),
                                 T)$data
Loading required package: ashr
In [3]:
head(llik_mat0)
A matrix: 6 × 151 of type dbl
-6.614617-6.618552-6.617560-6.613450-6.616111-6.611883-6.618015-6.633439-6.622304-6.626036-16.84729-8.927616-8.419585-8.748621-8.225967-8.983972 -9.837773-16.62644-16.03729-14.85275
-6.831829-6.834020-6.835287-6.832223-6.834385-6.827677-6.831764-6.829151-6.832735-6.831496-16.84892-9.208583-8.829744-9.097031-8.267823-8.773054 -9.503088-16.62518-16.03369-14.84397
-6.695994-6.699277-6.698464-6.689499-6.695493-6.699789-6.700010-6.715781-6.703436-6.707573-16.84790-8.950472-7.842302-8.583297-9.114239-9.141571 -9.943916-16.62740-16.03880-14.85582
-5.355281-5.369328-5.357995-5.359199-5.355141-5.359134-5.358983-5.373266-5.370355-5.371353-16.83787-7.640063-7.788803-7.287280-7.780747-7.762101 -8.556955-16.61377-16.01831-14.81529
-5.076525-5.092811-5.080164-5.077245-5.080430-5.080531-5.080543-5.096056-5.093674-5.094503-16.83578-7.475559-7.114715-7.508368-7.520835-7.522408 -8.317861-16.61124-16.01466-14.80822
-7.535281-7.531825-7.537303-7.532084-7.535045-7.529715-7.538802-7.546298-7.535427-7.539041-16.85418-9.734424-9.089307-9.455301-8.796534-9.919744-10.558070-16.63436-16.04849-14.87421
In [4]:
head(llik_mat)
A matrix: 6 × 151 of type dbl
-6.614617-6.618552-6.617560-6.617560-6.617560-6.617560-6.617560-6.633439-6.622304-6.626036-16.84729-8.927616-8.927616-8.927616-8.927616-8.927616 -9.837773-16.62644-16.03729-14.85275
-6.831829-6.834020-6.835287-6.835287-6.835287-6.835287-6.835287-6.829151-6.832735-6.831496-16.84892-9.208583-9.208583-9.208583-9.208583-9.208583 -9.503088-16.62518-16.03369-14.84397
-6.695994-6.699277-6.698464-6.698464-6.698464-6.698464-6.698464-6.715781-6.703436-6.707573-16.84790-8.950472-8.950472-8.950472-8.950472-8.950472 -9.943916-16.62740-16.03880-14.85582
-5.355281-5.369328-5.357995-5.357995-5.357995-5.357995-5.357995-5.373266-5.370355-5.371353-16.83787-7.640063-7.640063-7.640063-7.640063-7.640063 -8.556955-16.61377-16.01831-14.81529
-5.076525-5.092811-5.080164-5.080164-5.080164-5.080164-5.080164-5.096056-5.093674-5.094503-16.83578-7.475559-7.475559-7.475559-7.475559-7.475559 -8.317861-16.61124-16.01466-14.80822
-7.535281-7.531825-7.537303-7.537303-7.537303-7.537303-7.537303-7.546298-7.535427-7.539041-16.85418-9.734424-9.734424-9.734424-9.734424-9.734424-10.558070-16.63436-16.04849-14.87421
In [9]:
rows <- which(apply(llik_mat,2,function (x) any(is.infinite(x))))
if (length(rows) > 0)
    warning(paste("Some mixture components result in non-finite likelihoods,",
                          "either\n","due to numerical underflow/overflow,",
                          "or due to invalid covariance matrices",
                          paste(rows,collapse=", "), "\n"))
loglik_null = llik_mat[,1]
lfactors = apply(llik_mat,1,max)
llik_mat = llik_mat - lfactors
mixture_posterior_weights = mashr:::compute_posterior_weights(1/ncol(llik_mat), exp(llik_mat))
post0 = mashr:::calc_post_rcpp(t(data$Bhat), t(data$Shat), matrix(0,0,0), matrix(0,0,0), 
                              data$V,
                              matrix(0,0,0), matrix(0,0,0), 
                              simplify2array(xUlist),
                              t(mixture_posterior_weights),
                              T, 4)
In [10]:
Vinv = solve(svs)
U0 = list()
for (i in 1:length(xUlist)) U0[[i]] = xUlist[[i]] %*% solve(Vinv %*% xUlist[[i]] + diag(nrow(xUlist[[i]])))
In [11]:
post = mashr:::calc_post_precision_rcpp(t(data$Bhat), t(data$Shat), matrix(0,0,0), matrix(0,0,0), 
                              data$V,
                              matrix(0,0,0), matrix(0,0,0), 
                              Vinv,
                              simplify2array(U0),
                              t(mixture_posterior_weights),
                              4)
In [12]:
head(post$post_mean)
A matrix: 6 × 5 of type dbl
0.03023996-0.09104803-0.065958732 0.0870959372-0.0368528016
-0.01197680-0.11237334 0.005281632-0.1483115322-0.1167708502
-0.04478144 0.12249719-0.077508133 0.0198387907-0.0029075669
-0.02611321 0.02045102 0.073993548-0.0033532066 0.0279024790
0.01468888-0.05841497 0.006610546 0.0007731817-0.0006098738
-0.03702274 0.14111328-0.065148199 0.1588718245 0.0554540584
In [13]:
head(post0$post_mean)
A matrix: 6 × 5 of type dbl
0.03023996-0.09104803-0.065958732 0.0870959372-0.0368528016
-0.01197680-0.11237334 0.005281632-0.1483115322-0.1167708502
-0.04478144 0.12249719-0.077508133 0.0198387907-0.0029075669
-0.02611321 0.02045102 0.073993548-0.0033532066 0.0279024790
0.01468888-0.05841497 0.006610546 0.0007731817-0.0006098738
-0.03702274 0.14111328-0.065148199 0.1588718245 0.0554540584
In [14]:
head(post$post_cov)
  1. 0.0970547786416048
  2. 0.0106815929364079
  3. 0.0121522694564318
  4. 0.0211239793871823
  5. 0.0138583916329948
  6. 0.0106815929364079
In [15]:
head(post0$post_cov)
  1. 0.0970547786416048
  2. 0.010681592936408
  3. 0.0121522694564318
  4. 0.0211239793871823
  5. 0.0138583916329948
  6. 0.0106815929364079

Now test the relevant mvsusieR interface:

In [1]:
simulate_multivariate = function(n=100,p=100,r=2) {
  set.seed(1)
  res = mvsusieR::mvsusie_sim1(n,p,r,4,center_scale=TRUE)
  res$L = 10
  return(res)
}
attach(simulate_multivariate(r=2))
In [2]:
prior_var = V[1,1]
residual_var = as.numeric(var(y))
data = mvsusieR:::DenseData$new(X,y)
A = mvsusieR:::BayesianSimpleRegression$new(ncol(X), residual_var, prior_var)
A$fit(data, save_summary_stats = T)
null_weight = 0
mash_init = mvsusieR:::MashInitializer$new(list(V), 1, 1 - null_weight, null_weight)
In [11]:
residual_covar = cov(y)
mash_init$precompute_cov_matrices(data, residual_covar)
In [12]:
B = mvsusieR:::MashRegression$new(ncol(X), residual_covar, mash_init)
In [13]:
B$fit(data, save_summary_stats = T)

Copyright © 2016-2020 Gao Wang et al at Stephens Lab, University of Chicago