Save V8 expression and covariates to HDF5 format, for use with association analysis.
[global]
cwd = "~/Documents/GTExV8"
For covariate data we also compute a version of its orthonormal basis to be used for mr-ash
analysis.
%sosrun format_covar
[format_covar]
# Consolidate covariates to HDF5 format
input: glob.glob("${cwd!a}/GTEx_Analysis_v8_eQTL_covariates/*.v8.covariates.txt")
output: "${cwd!a}/GTExV8.covariates.h5", "${cwd!a}/GTExV8.covariates.orth.h5"
task: workdir = cwd
python:
import os
import pandas as pd
import scipy.linalg
if os.path.isfile(${output[0]!ar}):
os.remove(${output[0]!ar})
if os.path.isfile(${output[1]!ar}):
os.remove(${output[1]!ar})
for fn, table in zip([${input!ar,}], [${input!bnnnr,}]):
samples = pd.read_csv(fn, header = 0, sep = '\t', index_col = 0).transpose()
samples.to_hdf(${output[0]!ar}, '/{}'.format(table), mode = 'a', complevel = 9, complib = 'zlib')
samples_orth = pd.DataFrame(scipy.linalg.orth(samples), index = samples.index)
samples_orth.to_hdf(${output[1]!ar}, '/{}'.format(table), mode = 'a', complevel = 9, complib = 'zlib')
%sosrun format_expr
[format_expr]
# Consolidate expression data to HDF5 format
input: glob.glob("${cwd!a}/GTEx_Analysis_v8_eQTL_expression_matrices/*.v8.normalized_expression.bed.gz")
output: "${cwd!a}/GTExV8.expression.h5"
task: workdir = cwd
python:
import os
import pandas as pd
if os.path.isfile(${output!ar}):
os.remove(${output!ar})
for fn, table in zip([${input!ar,}], [${input!bnnnnr,}]):
dat = pd.read_csv(fn, sep = "\t", header = 0, index_col = 3)
dat.drop(dat.columns[[0,1,2]],axis=1, inplace = True)
dat.to_hdf(${output!ar}, '/{}'.format(table), mode = 'a', complevel = 9, complib = 'zlib')