Multivariate Bayesian variable selection regression

Prototype of core update in M&M ASH model

This is the core update for VEM step of M&M ASH model, version 2, single SNP calculation under MASH model.

The notebook prototypes the codes in libgaow repo.

$\newcommand{\bs}[1]{\boldsymbol{#1}}$ $\DeclareMathOperator*{\diag}{diag}$ $\DeclareMathOperator*{\cov}{cov}$ $\DeclareMathOperator*{\rank}{rank}$ $\DeclareMathOperator*{\var}{var}$ $\DeclareMathOperator*{\tr}{tr}$ $\DeclareMathOperator*{\veco}{vec}$ $\DeclareMathOperator*{\uniform}{\mathcal{U}niform}$ $\DeclareMathOperator*{\argmin}{arg\ min}$ $\DeclareMathOperator*{\argmax}{arg\ max}$ $\DeclareMathOperator*{\N}{N}$ $\DeclareMathOperator*{\gm}{Gamma}$ $\DeclareMathOperator*{\dif}{d}$

In [1]:
%cd /home/gaow/GIT/software/libgaow/py/src/
/home/gaow/GIT/software/libgaow/py/src
In [2]:
dat = readRDS('/home/gaow/Documents/GTExV8/Thyroid.Lung.FMO2.filled.rds')
str(dat)
attach(dat)
List of 2
 $ Y:'data.frame':	698 obs. of  2 variables:
  ..$ Thyroid: num [1:698] 0.163 0.436 -0.212 0.327 -0.698 ...
  ..$ Lung   : num [1:698] 0.77011 0.77799 -0.65361 0.00672 -0.36792 ...
 $ X: num [1:698, 1:7492] 1 0 0 0 0 1 1 0 1 1 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:698] "GTEX-111CU" "GTEX-111FC" "GTEX-111VG" "GTEX-111YS" ...
  .. ..$ : chr [1:7492] "chr1_170185243_G_A_b38" "chr1_170185272_T_C_b38" "chr1_170185405_C_A_b38" "chr1_170185417_G_A_b38" ...
In [3]:
%get X Y --from R
import numpy as np
Loading required package: feather

Data preview

In [4]:
X
Out[4]:
array([[ 1.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  1.,  0., ...,  0.,  1.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.]])
In [5]:
Y = Y.as_matrix()
In [6]:
Y
Out[6]:
array([[ 0.16348104,  0.77010917],
       [ 0.43588995,  0.77798736],
       [-0.21237311, -0.65361193],
       ..., 
       [ 0.62036618, -0.0035004 ],
       [ 0.00279156, -0.05439095],
       [-0.14650835,  0.29935286]])

Utility: mash model

In [155]:
%save /home/gaow/GIT/software/libgaow/py/src/model_mash.py -f
#!/usr/bin/env python3
__author__ = "Gao Wang"
__copyright__ = "Copyright 2016, Stephens lab"
__email__ = "gaow@uchicago.edu"
__license__ = "MIT"
__version__ = "0.1.0"

import numpy as np, scipy as sp
from scipy.stats import norm, multivariate_normal as mvnorm
from collections import OrderedDict

def inv_sympd(m):
    '''
    Inverse of symmetric positive definite
    https://stackoverflow.com/questions/40703042/more-efficient-way-to-invert-a-matrix-knowing-it-is-symmetric-and-positive-semi
    '''
    zz , _ = sp.linalg.lapack.dpotrf(m, False, False)
    inv_m , info = sp.linalg.lapack.dpotri(zz)
    # lapack only returns the upper or lower triangular part
    return np.triu(inv_m) + np.triu(inv_m, k=1).T

def get_svs(s, V):
    '''
    diag(s) @ V @ diag(s)
    '''
    return (s * V.T).T * s

def safe_mvnorm_logpdf(val, cov):
    try:
        return mvnorm.logpdf(val, cov=cov)
    except np.linalg.linalg.LinAlgError:
        if len(val.shape) == 1:
            return np.inf if np.sum(val) < 1E-6 else -np.inf
        else:
            return np.array([np.inf if np.sum(x) < 1E-6 else -np.inf for x in val.T])

class LikelihoodMASH:
    def __init__(self, data):
        self.J = data.B.shape[0]
        self.R = data.B.shape[1]
        self.P = len(data.U)
        self.data = data
        self.data.lik = {'relative_likelihood' : None,
                         'lfactor': None,
                         'marginal_loglik': None,
                         'loglik': None,
                         'null_loglik': None,
                         'alt_loglik': None}
        self.debug = None

    def compute_log10bf(self):
        self.data.l10bf = (self.data.lik['alt_loglik'] -  self.data.lik['null_loglik']) / np.log(10)

    def compute_relative_likelihood_matrix(self):
        matrix_llik = self._calc_likelihood_matrix_comcov() if self.data.is_common_cov() \
                      else self._calc_likelihood_matrix()
        lfactors = np.amax(matrix_llik, axis = 1)
        self.data.lik['relative_likelihood'] = np.exp(matrix_llik - np.vstack(lfactors))
        self.data.lik['lfactor'] = lfactors

    def _calc_likelihood_matrix(self):
        loglik = np.zeros((self.J, self.P))
        for j in range(self.J):
            sigma_mat = get_svs(self.data.S[j,:], self.data.V)
            loglik[j,:] = np.array([safe_mvnorm_logpdf(self.data.B[j,:], sigma_mat + self.data.U[p]) for p in self.data.U])
        return loglik

    def _calc_likelihood_matrix_comcov(self):
        sigma_mat = get_svs(self.data.S[0,:], self.data.V)
        return np.array([safe_mvnorm_logpdf(self.data.B, sigma_mat + self.data.U[p]) for p in self.data.U]).T

    def compute_loglik_from_matrix(self, options = ['marginal', 'alt', 'null']):
        '''
        data.lik.relative_likelihood first column is null, the rest are alt
        '''
        if 'marginal' in options:
            self.data.lik['marginal_loglik'] = np.log(self.data.lik['relative_likelihood'] @ self.data.pi) + self.data.lik['lfactor']
            self.data.lik['loglik'] = np.sum(self.data.lik['marginal_loglik'])
        if 'alt' in options:
            self.data.lik['alt_loglik'] = np.log(self.data.lik['relative_likelihood'][:,1:] @ (self.data.pi[1:] / (1 - self.data.pi[0]))) + self.data.lik['lfactor']
        if 'null' in options:
            self.data.lik['null_loglik'] = np.log(self.data.lik['relative_likelihood'][:,0]) + self.data.lik['lfactor']

class PosteriorMASH:
    def __init__(self, data):
        '''
        // @param b_mat J by R
        // @param s_mat J by R
        // @param v_mat R by R
        // @param U_cube list of prior covariance matrices, for each mixture component P by R by R
        '''
        self.J = data.B.shape[0]
        self.R = data.B.shape[1]
        self.P = len(data.U)
        self.data = data
        self.data.post_mean_mat = np.zeros((self.R, self.J))
        self.data.post_mean2_mat = np.zeros((self.R, self.J))
        self.data.neg_prob_mat = np.zeros((self.R, self.J))
        self.data.zero_prob_mat = np.zeros((self.R, self.J))

    def compute_posterior_weights(self):
        d = (self.data.pi * self.data.lik['relative_likelihood'])
        self.data.posterior_weights = (d.T / np.sum(d, axis = 1))

    def compute_posterior(self):
        for j in range(self.J):
            Vinv_mat = inv_sympd(get_svs(self.data.S[j,:], self.data.V))
            mu1_mat = np.zeros((self.R, self.P))
            mu2_mat = np.zeros((self.R, self.P))
            zero_mat = np.zeros((self.R, self.P))
            neg_mat = np.zeros((self.R, self.P))
            for p, name in enumerate(self.data.U.keys()):
                U1_mat = self.get_posterior_cov(Vinv_mat, self.data.U[name])
                mu1_mat[:,p] = self.get_posterior_mean_vec(self.data.B[j,:], Vinv_mat, U1_mat)
                sigma_vec = np.sqrt(np.diag(U1_mat))
                null_cond = (sigma_vec == 0)
                mu2_mat[:,p] = np.square(mu1_mat[:,p]) + np.diag(U1_mat)
                if not null_cond.all():
                    neg_mat[np.invert(null_cond),p] = norm.sf(mu1_mat[np.invert(null_cond),p], scale=sigma_vec[np.invert(null_cond)])
                zero_mat[null_cond,p] = 1.0
            self.data.post_mean_mat[:,j] = mu1_mat @ self.data.posterior_weights[:,j]
            self.data.post_mean2_mat[:,j] = mu2_mat @ self.data.posterior_weights[:,j]
            self.data.neg_prob_mat[:,j] = neg_mat @ self.data.posterior_weights[:,j]
            self.data.zero_prob_mat[:,j] = zero_mat @ self.data.posterior_weights[:,j]

    def compute_posterior_comcov(self):
        Vinv_mat = inv_sympd(get_svs(self.data.S[0,:], self.data.V))
        for p, name in enumerate(self.data.U.keys()):
            zero_mat = np.zeros((self.R, self.J))
            U1_mat = self.get_posterior_cov(Vinv_mat, self.data.U[name])
            mu1_mat = self.get_posterior_mean_mat(self.data.B, Vinv_mat, U1_mat)
            sigma_vec = np.sqrt(np.diag(U1_mat))
            null_cond = (sigma_vec == 0)
            sigma_mat = np.tile(sigma_vec, (self.J, 1)).T
            neg_mat = np.zeros((self.R, self.J))
            if not null_cond.all():
                neg_mat[np.invert(null_cond),:] = norm.sf(mu1_mat[np.invert(null_cond),:], scale = sigma_mat[np.invert(null_cond),:])
            mu2_mat = np.square(mu1_mat) + np.vstack(np.diag(U1_mat))
            zero_mat[null_cond,:] = 1.0
            self.data.post_mean_mat += self.data.posterior_weights[p,:] * mu1_mat
            self.data.post_mean2_mat += self.data.posterior_weights[p,:] * mu2_mat
            self.data.neg_prob_mat += self.data.posterior_weights[p,:] * neg_mat
            self.data.zero_prob_mat += self.data.posterior_weights[p,:] * zero_mat

    @staticmethod
    def get_posterior_mean_vec(B, V_inv, U):
        return U @ (V_inv @ B)

    @staticmethod
    def get_posterior_mean_mat(B, V_inv, U):
        return (B @ V_inv @ U).T

    @staticmethod
    def get_posterior_cov(V_inv, U):
        return U @ inv_sympd(V_inv @ U + np.identity(U.shape[0]))

    @classmethod
    def apply(cls, data):
        obj = cls(data)
        obj.compute_posterior_weights()
        if data.is_common_cov():
            obj.compute_posterior_comcov()
        else:
            obj.compute_posterior()

class PriorMASH:
    def __init__(self, data):
        self.data = data
        self.R = data.B.shape[1]

    def expand_cov(self, use_pointmass = True):
        def product(x,y):
            for item in y:
                yield x*item
        res = OrderedDict()
        if use_pointmass:
            res['null'] = np.zeros((self.R, self.R))
        res.update(OrderedDict(sum([[(f"{p}.{i+1}", g) for i, g in enumerate(product(self.data.U[p], np.square(self.data.grid)))] for p in self.data.U], [])))
        self.data.U = res

Utility: regression data

In [156]:
%save /home/gaow/GIT/software/libgaow/py/src/regression_data.py -f
#!/usr/bin/env python3
__author__ = "Gao Wang"
__copyright__ = "Copyright 2016, Stephens lab"
__email__ = "gaow@uchicago.edu"
__license__ = "MIT"
__version__ = "0.1.0"

#from .model_mash import PriorMASH, LikelihoodMASH, PosteriorMASH
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression

class RegressionData:
    def __init__(self, X = None, Y = None, Z = None, B = None, S = None):
        self.X = X
        self.Y = Y
        self.Z = Z
        self.B = B
        self.S = S
        self.lik = None
        if (self.X is not None and self.Y is not None) and (self.B is None and self.S is None):
            self.get_summary_stats()

    def get_summary_stats(self):
        '''
        perform univariate regression
        FIXME: it is slower than lapply + .lm.fit in R
        FIXME: this faster implementation is on my watch list:
        https://github.com/ajferraro/fastreg
        '''
        self.B = np.zeros((self.X.shape[1], self.Y.shape[1]))
        self.S = np.zeros((self.X.shape[1], self.Y.shape[1]))
        for r, y in enumerate(self.Y.T):
            self.B[:,r], self.S[:,r] = self.univariate_simple_regression(self.X, y)[:,[0,2]].T
        
    @staticmethod
    def univariate_simple_regression(X, y, Z=None):
        if Z is not None:
            model = LinearRegression()
            model.fit(Z, y)
            y = y - model.predict(Z)
        return np.vstack([linregress(x, y) for x in X.T])[:,[0,1,4]]

    def __str__(self):
        l = dir(self)
        d = self.__dict__
        from pprint import pformat
        return pformat(d, indent = 4)

class MASHData(RegressionData):
    def __init__(self, X = None, Y = None, Z = None, B = None, S = None):
        RegressionData.__init__(self, X, Y, Z, B, S)
        self.post_mean_mat = None
        self.post_mean2_mat = None
        self.neg_prob_mat = None
        self.zero_prob_mat = None
        self._is_common_cov = None
        self.V = None
        self.U = None
        self.pi = None
        self.posterior_weights = None
        self.grid = None
        self.l10bf = None

    def is_common_cov(self):
        if self._is_common_cov is None and self.S is not None:
            self._is_common_cov = (self.S.T == self.S.T[0,:]).all()
        return self._is_common_cov

    def compute_posterior(self):
        PosteriorMASH.apply(self)

    def set_prior(self):
        prior = PriorMASH(self)
        prior.expand_cov()

Test univariate regression routine

In [9]:
res = .lm.fit(cbind(1, X[,1:2]), Y[,1])
head(res$residuals)
  1. 0.178541744725261
  2. 0.414394499518509
  3. -0.15988496576394
  4. 0.305153574791564
  5. -0.71932663295116
  6. 0.855183342987
In [10]:
head(Y[,1] - cbind(1, X[,1:2]) %*% coef(res))
GTEX-111CU 0.1785417
GTEX-111FC 0.4143945
GTEX-111VG-0.1598850
GTEX-111YS 0.3051536
GTEX-1122O-0.7193266
GTEX-1128S 0.8551833
In [11]:
res = lm(Y[,1] ~ X[,1:2])
head(res$residuals)
1
0.178541744725261
2
0.414394499518509
3
-0.15988496576394
4
0.305153574791564
5
-0.71932663295116
6
0.855183342987
In [12]:
head(Y[,1] - cbind(1, X[,1:2]) %*% coef(res))
GTEX-111CU 0.1785417
GTEX-111FC 0.4143945
GTEX-111VG-0.1598850
GTEX-111YS 0.3051536
GTEX-1122O-0.7193266
GTEX-1128S 0.8551833
In [13]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X[:,0:2], Y[:,0])
res = Y[:,0] - model.predict(X[:,0:2])
res[:10]
Out[13]:
array([ 0.17854174,  0.4143945 , -0.15988497,  0.30515357, -0.71932663,
        0.85518334,  1.30196229,  0.09393039,  0.77884804, -0.31470184])

Test MASH posterior

Python version

In [84]:
def mash_compute_this_and_that(X, Y):
    data = MASHData(X = X, Y = Y)
    data.U = {'identity': np.identity(2)}
    data.V = np.identity(2)
    data.pi = np.array([0.9, 0.05, 0.05])
    data.grid = [0.5, 1]
    data.set_prior()
    lik = LikelihoodMASH(data)
    lik.compute_relative_likelihood_matrix()
    lik.compute_loglik_from_matrix()
    lik.compute_log10bf()
    data.compute_posterior()
    return data
In [85]:
res = mash_compute_this_and_that(X, Y)
In [16]:
print(res)
{   'B': array([[-0.03408121, -0.02613348],
       [-0.07003705, -0.05107982],
       [-0.04920761, -0.06484084],
       ..., 
       [ 0.03982337,  0.00081487],
       [-0.01525277,  0.03298256],
       [ 0.04406519,  0.00465186]]),
    'S': array([[ 0.03928001,  0.02503258],
       [ 0.07055237,  0.04496304],
       [ 0.05550492,  0.03531559],
       ..., 
       [ 0.0570164 ,  0.03635724],
       [ 0.03057429,  0.01945261],
       [ 0.05676716,  0.03620087]]),
    'U': OrderedDict([   ('null', array([[ 0.,  0.],
       [ 0.,  0.]])),
                         (   'identity.1',
                             array([[ 0.25,  0.  ],
       [ 0.  ,  0.25]])),
                         (   'identity.2',
                             array([[ 1.,  0.],
       [ 0.,  1.]]))]),
    'V': array([[ 1.,  0.],
       [ 0.,  1.]]),
    'X': array([[ 1.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  1.,  0., ...,  0.,  1.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.]]),
    'Y': array([[ 0.16348104,  0.77010917],
       [ 0.43588995,  0.77798736],
       [-0.21237311, -0.65361193],
       ..., 
       [ 0.62036618, -0.0035004 ],
       [ 0.00279156, -0.05439095],
       [-0.14650835,  0.29935286]]),
    'Z': None,
    '_is_common_cov': False,
    'grid': [0.5, 1],
    'l10bf': array([[-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352],
       [-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352],
       [-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352],
       ..., 
       [-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352],
       [-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352],
       [-2.21219688, -1.61703523, -1.41510661, ..., -2.1839402 ,
        -2.15136117, -2.15957352]]),
    'lik': {   'alt_loglik': array([[ 5.99623248,  6.1953409 ,  7.14169864, ...,  5.31545763,
         6.63906512,  5.38025522],
       [ 4.60826588,  4.8073743 ,  5.75373205, ...,  3.92749104,
         5.25109853,  3.99228863],
       [ 4.14917942,  4.34828784,  5.29464559, ...,  3.46840458,
         4.79201207,  3.53320217],
       ..., 
       [ 5.92757547,  6.12668388,  7.07304163, ...,  5.24680062,
         6.57040811,  5.31159821],
       [ 5.85848209,  6.05759051,  7.00394826, ...,  5.17770724,
         6.50131474,  5.24250483],
       [ 5.87090335,  6.07001177,  7.01636951, ...,  5.1901285 ,
         6.51373599,  5.25492609]]),
               'lfactor': array([[ 4.16538757],
       [ 2.77742097],
       [ 2.31833452],
       ..., 
       [ 4.09673056],
       [ 4.02763718],
       [ 4.04005844]]),
               'loglik': None,
               'marginal_loglik': None,
               'null_loglik': array([[ 11.09000404,   9.9187021 ,  10.40010203, ...,  10.34416578,
         11.59275729,  10.35285703],
       [  9.70203744,   8.53073551,   9.01213543, ...,   8.95619919,
         10.20479069,   8.96489043],
       [  9.24295098,   8.07164905,   8.55304898, ...,   8.49711273,
          9.74570423,   8.50580397],
       ..., 
       [ 11.02134702,   9.85004509,  10.33144502, ...,  10.27550877,
         11.52410027,  10.28420001],
       [ 10.95225365,   9.78095172,  10.26235164, ...,  10.2064154 ,
         11.4550069 ,  10.21510664],
       [ 10.96467491,   9.79337297,  10.2747729 , ...,  10.21883665,
         11.46742816,  10.22752789]]),
               'relative_likelihood': array([[ 1.        ,  0.00980395,  0.00246572],
       [ 1.        ,  0.03847743,  0.00982787],
       [ 1.        ,  0.06131811,  0.01558137],
       ..., 
       [ 1.        ,  0.01045639,  0.00263814],
       [ 1.        ,  0.01128277,  0.00283183],
       [ 1.        ,  0.01105872,  0.0027915 ]])},
    'neg_prob_mat': array([[  5.49433658e-04,   2.24165414e-03,   3.45042889e-03, ...,
          1.77114432e-04,   5.41253512e-04,   1.69186104e-04],
       [  5.80025487e-04,   2.33176806e-03,   4.11166625e-03, ...,
          3.56987564e-04,   3.53297220e-05,   3.45211672e-04]]),
    'pi': array([ 0.9 ,  0.05,  0.05]),
    'post_mean2_mat': array([[  1.82852978e-06,   2.57995469e-05,   2.30594775e-05, ...,
          3.46531369e-06,   9.11249902e-07,   3.91175385e-06],
       [  8.89191509e-07,   1.22627365e-05,   2.30182948e-05, ...,
          9.57098254e-07,   1.14628632e-06,   1.01962945e-06]]),
    'post_mean_mat': array([[ -2.30946163e-05,  -1.84346572e-04,  -2.07167939e-04, ...,
          2.86337525e-05,  -1.19131803e-05,   3.35139738e-05],
       [ -1.77639138e-05,  -1.35782667e-04,  -2.74672751e-04, ...,
          5.89720227e-07,   2.58096594e-05,   3.56081381e-06]]),
    'posterior_weights': array([[  9.99318816e-01,   9.97323555e-01,   9.95745981e-01, ...,
          9.99273055e-01,   9.99216470e-01,   9.99231135e-01],
       [  5.44293024e-04,   2.13191356e-03,   3.39206994e-03, ...,
          5.80488132e-04,   6.26329619e-04,   6.13900820e-04],
       [  1.36891348e-04,   5.44531546e-04,   8.61949224e-04, ...,
          1.46456721e-04,   1.57200874e-04,   1.54963885e-04]]),
    'zero_prob_mat': array([[ 0.99931882,  0.99732355,  0.99574598, ...,  0.99927306,
         0.99921647,  0.99923114],
       [ 0.99931882,  0.99732355,  0.99574598, ...,  0.99927306,
         0.99921647,  0.99923114]])}
In [17]:
betahat = res.B
sebetahat = res.S
In [18]:
res.B.shape
Out[18]:
(7492, 2)

R version

In [19]:
%get betahat sebetahat
In [20]:
# SoS bug
betahat = matrix(betahat, ncol = 2, byrow = T)
sebetahat = matrix(sebetahat, ncol = 2, byrow = T)
In [21]:
data = mashr::set_mash_data(betahat, sebetahat)
g = list(Ulist = list(identity = diag(2)), grid = c(0.5,1), pi = c(0.9,0.05,0.05), usepointmass = T)
In [50]:
output = mashr::mash(data, g = g, fixg = TRUE, outputlevel = 2)
 - Computing 7492 x 3 likelihood matrix.
 - Likelihood calculations took 0.03 seconds.
 - Computing posterior matrices.
 - Computation allocated took 0.02 seconds.

Posterior agreement

In [51]:
res = output$result
str(res)
List of 5
 $ PosteriorMean: num [1:7492, 1:2] -2.31e-05 -1.84e-04 -2.07e-04 -1.84e-04 -1.84e-04 ...
 $ PosteriorSD  : num [1:7492, 1:2] 0.00135 0.00508 0.0048 0.00508 0.00508 ...
 $ lfdr         : num [1:7492, 1:2] 0.999 0.997 0.996 0.997 0.997 ...
 $ NegativeProb : num [1:7492, 1:2] 0.000549 0.002242 0.00345 0.002242 0.002242 ...
 $ lfsr         : num [1:7492, 1:2] 0.999 0.998 0.997 0.998 0.998 ...
In [52]:
head(res$PosteriorMean, n = 10)
-2.309462e-05-1.776391e-05
-1.843466e-04-1.357827e-04
-2.071679e-04-2.746728e-04
-1.843466e-04-1.357827e-04
-1.843466e-04-1.357827e-04
-2.373037e-07-4.073059e-07
-6.903817e-05-1.042731e-05
3.273801e-04 5.317394e-06
-8.864880e-05-1.199251e-04
-1.959964e-04-2.629151e-04
In [53]:
print(res.post_mean_mat.T[:10,:])
[[ -2.30946163e-05  -1.77639138e-05]
 [ -1.84346572e-04  -1.35782667e-04]
 [ -2.07167939e-04  -2.74672751e-04]
 [ -1.84346572e-04  -1.35782667e-04]
 [ -1.84346572e-04  -1.35782667e-04]
 [ -2.37303667e-07  -4.07305942e-07]
 [ -6.90381717e-05  -1.04273146e-05]
 [  3.27380081e-04   5.31739393e-06]
 [ -8.86487991e-05  -1.19925076e-04]
 [ -1.95996399e-04  -2.62915146e-04]]
In [54]:
head(res$PosteriorSD, n = 10)
0.00135203420.0009428022
0.00507597900.0034991856
0.00479755760.0047898695
0.00507597900.0034991856
0.00507597900.0034991856
0.00039348790.0002527649
0.00680229470.0043295445
0.00548461000.0013089059
0.00349169410.0031746901
0.00466166350.0046710262
In [55]:
print(np.sqrt(res.post_mean2_mat - np.square(res.post_mean_mat)).T[:10,:])
[[ 0.00135203  0.0009428 ]
 [ 0.00507598  0.00349919]
 [ 0.00479756  0.00478987]
 [ 0.00507598  0.00349919]
 [ 0.00507598  0.00349919]
 [ 0.00039349  0.00025276]
 [ 0.00680229  0.00432954]
 [ 0.00548461  0.00130891]
 [ 0.00349169  0.00317469]
 [ 0.00466166  0.00467103]]
In [56]:
head(res$NegativeProb, n = 10)
5.494337e-045.800255e-04
2.241654e-032.331768e-03
3.450429e-034.111666e-03
2.241654e-032.331768e-03
2.241654e-032.331768e-03
8.620779e-059.143502e-05
1.647212e-031.479577e-03
2.530507e-051.954250e-03
1.620550e-031.999660e-03
3.315622e-033.967268e-03
In [57]:
print(res.neg_prob_mat.T[:10,:])
[[  5.49433658e-04   5.80025487e-04]
 [  2.24165414e-03   2.33176806e-03]
 [  3.45042889e-03   4.11166625e-03]
 [  2.24165414e-03   2.33176806e-03]
 [  2.24165414e-03   2.33176806e-03]
 [  8.62077869e-05   9.14350185e-05]
 [  1.64721241e-03   1.47957660e-03]
 [  2.53050679e-05   1.95425042e-03]
 [  1.62054984e-03   1.99965984e-03]
 [  3.31562154e-03   3.96726780e-03]]

Likelihood and Bayes factors

In [86]:
print(res.lik['loglik'])
21092.1811206
In [71]:
output$loglik
21092.1811206117
In [62]:
head(output$null_loglik, 10)
  1. 4.16538757076441
  2. 2.77742097387796
  3. 2.31833451527425
  4. 2.77742097387796
  5. 2.77742097387796
  6. 5.58105688238237
  7. 2.6971464676332
  8. 2.35655692730795
  9. 2.98667569323665
  10. 2.35396411918973
In [65]:
head(output$alt_loglik, 10)
print(dim(output$null_loglik))
-0.9283840
-0.9459402
-0.9400689
-0.9459402
-0.9459402
-0.9238257
-0.9609677
-0.9347843
-0.9389967
-0.9397233
NULL
In [63]:
head(output$vloglik)
4.060708
2.674740
2.217237
2.674740
2.674740
5.475863
In [87]:
res.lik['null_loglik'][:10]
Out[87]:
array([ 4.16538757,  2.77742097,  2.31833452,  2.77742097,  2.77742097,
        5.58105688,  2.69714647,  2.35655693,  2.98667569,  2.35396412])
In [88]:
res.lik['alt_loglik'][:10]
Out[88]:
array([-0.92838399, -0.94594023, -0.94006887, -0.94594023, -0.94594023,
       -0.92382573, -0.96096769, -0.93478429, -0.9389967 , -0.93972334])
In [91]:
res.lik['marginal_loglik'][:10]
Out[91]:
array([ 4.06070847,  2.67474049,  2.21723709,  2.67474049,  2.67474049,
        5.47586259,  2.59464642,  2.25532166,  2.88350487,  2.2527192 ])
In [93]:
head(mashr::get_log10bf(output), 10)
-2.212197
-1.617035
-1.415107
-1.617035
-1.617035
-2.825035
-1.588699
-1.429411
-1.704898
-1.430430
In [95]:
res.l10bf[:10]
Out[95]:
array([-2.21219688, -1.61703523, -1.41510661, -1.61703523, -1.61703523,
       -2.82503462, -1.58869879, -1.42941133, -1.70489786, -1.43043029])

Check calculation for common cov routine

In [157]:
res.S = res.S ** 0
def mash_compute_this_and_that(B, S):
    data = MASHData(B = B, S = S)
    data.U = {'identity': np.identity(2)}
    data.V = np.identity(2)
    data.pi = np.array([0.9, 0.05, 0.05])
    data.grid = [0.5, 1]
    data.set_prior()
    lik = LikelihoodMASH(data)
    lik.compute_relative_likelihood_matrix()
    lik.compute_loglik_from_matrix()
    lik.compute_log10bf()
    data.compute_posterior()
    return data
In [158]:
res0 = mash_compute_this_and_that(res.B, res.S)
In [167]:
print(res0.lik['marginal_loglik'][:10])
print(res0.lik['loglik'])
[-1.87440689 -1.87718154 -1.87674669 -1.87718154 -1.87718154 -1.87350819
 -1.87382948 -1.87662117 -1.87581722 -1.87666407]
-14071.5357133
In [169]:
res0.l10bf[:10]
Out[169]:
array([-0.18696032, -0.18657196, -0.18663283, -0.18657196, -0.18657196,
       -0.18708609, -0.18704113, -0.1866504 , -0.18676293, -0.1866444 ])
In [162]:
data0 = mashr::set_mash_data(betahat, sebetahat ^ 0)
output0 = mashr::mash(data0, g = g, fixg = TRUE, outputlevel = 2)
 - Computing 7492 x 3 likelihood matrix.
 - Likelihood calculations took 0.02 seconds.
 - Computing posterior matrices.
 - Computation allocated took 0.01 seconds.
In [168]:
print(head(output0$vloglik))
print(output0$loglik)
          [,1]
[1,] -1.874407
[2,] -1.877182
[3,] -1.876747
[4,] -1.877182
[5,] -1.877182
[6,] -1.873508
[1] -14071.54
In [174]:
print(np.sqrt(res0.post_mean2_mat - np.square(res0.post_mean_mat)).T[:10,:])
[[ 0.14580651  0.1457939 ]
 [ 0.14597999  0.14591946]
 [ 0.14590279  0.14594979]
 [ 0.14597999  0.14591946]
 [ 0.14597999  0.14591946]
 [ 0.14575174  0.14575185]
 [ 0.14577749  0.14576071]
 [ 0.14600341  0.14583559]
 [ 0.14585838  0.14589401]
 [ 0.14589801  0.14594566]]
In [175]:
print(res0.neg_prob_mat.T[:10,:])
print(res0.zero_prob_mat.T[:10,:])
[[ 0.03418912  0.03407225]
 [ 0.03474676  0.03446786]
 [ 0.0344358   0.03466578]
 [ 0.03474676  0.03446786]
 [ 0.03474676  0.03446786]
 [ 0.03369986  0.03371488]
 [ 0.03405713  0.03373694]
 [ 0.03253624  0.03369128]
 [ 0.03430655  0.03451324]
 [ 0.03442033  0.03465671]]
[[ 0.93262421  0.93262421]
 [ 0.932568    0.932568  ]
 [ 0.93257681  0.93257681]
 [ 0.932568    0.932568  ]
 [ 0.932568    0.932568  ]
 [ 0.93264241  0.93264241]
 [ 0.9326359   0.9326359 ]
 [ 0.93257936  0.93257936]
 [ 0.93259565  0.93259565]
 [ 0.93257849  0.93257849]]
In [170]:
head(mashr::get_log10bf(output0), 10)
-0.1869603
-0.1865720
-0.1866328
-0.1865720
-0.1865720
-0.1870861
-0.1870411
-0.1866504
-0.1867629
-0.1866444
In [173]:
head(output0$result$PosteriorSD, n = 10)
0.14580650.1457939
0.14598000.1459195
0.14590280.1459498
0.14598000.1459195
0.14598000.1459195
0.14575170.1457518
0.14577750.1457607
0.14600340.1458356
0.14585840.1458940
0.14589800.1459457
In [176]:
head(output0$result$NegativeProb, n = 10)
0.034189120.03407225
0.034746760.03446786
0.034435800.03466578
0.034746760.03446786
0.034746760.03446786
0.033699860.03371488
0.034057130.03373694
0.032536240.03369128
0.034306550.03451324
0.034420330.03465671

Copyright © 2016-2020 Gao Wang et al at Stephens Lab, University of Chicago