Multivariate Bayesian variable selection regression

Extract per-gene per tissue data

For fine mapping demos.

In [3]:
setwd("~/Documents/GTExV8")
load_data = function(genotype_file, expr_file, cov_file, geno_table, expr_table, cov_table) {
        library(rhdf5)
        geno = h5read(genotype_file, geno_table)
        gdata = data.frame(geno$block0_values)
        colnames(gdata) = geno$axis1
        rownames(gdata) = geno$axis0
        #
        expr = h5read(expr_file, expr_table)
        edata = data.frame(expr$block0_values)
        colnames(edata) = tools::file_path_sans_ext(expr$axis1)
        edata = data.frame(edata[, basename(geno_table)])
        rownames(edata) = expr$axis0
        colnames(edata) = basename(geno_table)
        #
        covariate <- h5read(cov_file, cov_table)
        cdata = data.frame(covariate$block0_values)
        colnames(cdata) = covariate$axis1
        rownames(cdata) = covariate$axis0
        cdata = t(cdata)
        # reorder
        edata = edata[match(rownames(cdata), rownames(edata)),]
        gdata = gdata[rownames(cdata),]
        return(list(X = as.matrix(gdata), y = as.vector(edata), Z = as.matrix(cdata), chrom = strsplit(colnames(gdata)[1], '_')[[1]][1], 
                    pos = as.integer(do.call(rbind, strsplit(colnames(gdata), '_'))[,2])))
    }

genotype_file = "GTExV8.genotype.cis.h5"
expr_file = "GTExV8.expression.h5"
cov_file = "GTExV8.covariates.h5"
## 
geno_table = "/chr1/ENSG00000094963"
expr_table = "/Thyroid"
cov_table = "/Thyroid"
dat = load_data(genotype_file, expr_file, cov_file, geno_table, expr_table, cov_table)
saveRDS(dat, 'Thyroid.FMO2.pm1Mb.RDS')

## 
geno_table = "/chr11/ENSG00000248746"
expr_table = "/Muscle_Skeletal"
cov_table = "/Muscle_Skeletal"
dat = load_data(genotype_file, expr_file, cov_file, geno_table, expr_table, cov_table)
saveRDS(dat, 'Muscle_Skeletal.ACTN3.pm1Mb.RDS')

Copyright © 2016-2020 Gao Wang et al at Stephens Lab, University of Chicago