Multivariate Bayesian variable selection regression

UKB Bloodcells Multivariate fine-mapping

Analysis

Extract Data

m=/project2/mstephens/yuxin/ukb-bloodcells/zscores
cd $m && ls *.rds | sed 's/\.rds//g' > analysis_units.txt && cd -
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb extract_effects \
        --cwd /project2/mstephens/yuxin/ukb-bloodcells  --analysis-units $m/analysis_units.txt \
        --datadir $m &> extract_effects.log
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb Ycov \
        --cwd /project2/mstephens/yuxin/ukb-bloodcells

Compute priors

sos run analysis/20210503_ukb_pipeline.ipynb factor_analysis
sos run analysis/20210503_ukb_pipeline.ipynb mixture_model
In [ ]:
 
In [ ]:
[global]
parameter: data_dir = path('/project2/mstephens/yuxin/ukb-bloodcells/zscores')
parameter: wd = path('/project2/mstephens/yuxin/ukb-bloodcells/')
parameter: analysis_units = path('/project2/mstephens/yuxin/ukb-bloodcells/zscores/analysis_units.txt')
parameter: suffix = 'ukbbloodcells.rds'
parameter: name = "ukbbloodcells_prepare"
regions = [x.strip() for x in open(analysis_units).readlines() if x.strip() and not x.strip().startswith('#')]
genes = [f"{data_dir:a}/{x}.{suffix}" for x in regions if path(f"{data_dir:a}/{x}.{suffix}").exists()]
In [ ]:
[factor_analysis]
script: interpreter= 'qsub', expand = True
    #!/bin/bash

    #SBATCH --time=36:00:00
    #SBATCH --partition=mstephens
    #SBATCH --nodes=1
    #SBATCH --ntasks-per-node=1
    #SBATCH --cpus-per-task=1
    #SBATCH --mem-per-cpu=2000
    #SBATCH --job-name={step_name}
    #SBATCH --mail-type=BEGIN,END,FAIL

    module load R
    sos run ~/GitHub/bioworkflows/multivariate-fine-mapping/mixture_prior.ipynb flash --name {name} --cwd {wd} -s build &> {wd}/{name}_factor_analysis.log
    sos run ~/GitHub/bioworkflows/multivariate-fine-mapping/mixture_prior.ipynb pca --name {name} --cwd {wd} -s build &>> {wd}/{name}_factor_analysis.log
    sos run v canonical --name {name} --cwd {wd} -s build &>> {wd}/{name}_factor_analysis.log
In [ ]:
[mixture_model]
def get_cmd(m):
    c1 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method ed --residcor Y --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/ed_Y_{m}.log
    '''
    c2 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method ed --residcor znull --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/ed_znull_{m}.log
    '''
    
    c3 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method teem --residcor Y --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/teem_Y_{m}.log
    '''
    c4 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method teem --residcor znull --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/teem_znull_{m}.log
    '''
    
    c5 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ed --residcor Y --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/bovy_Y_{m}.log
    '''
    c6 = f'''
    sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ed --residcor znull --name {m} --cwd {wd} \
        -c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/bovy_Y_{m}.log
    '''
    return [c1,c2,c3,c4,c5,c6]
cmds = get_cmd(name)
input: for_each = 'cmds'
script: interpreter= 'qsub', expand = True
#!/bin/bash
  
#SBATCH --time=36:00:00
#SBATCH --partition=mstephens
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=2000
#SBATCH --job-name={step_name}
#SBATCH --mail-type=BEGIN,END,FAIL

module load R
{_cmds}

Copyright © 2016-2020 Gao Wang et al at Stephens Lab, University of Chicago