m=/project2/mstephens/yuxin/ukb-bloodcells/zscores
cd $m && ls *.rds | sed 's/\.rds//g' > analysis_units.txt && cd -
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb extract_effects \
--cwd /project2/mstephens/yuxin/ukb-bloodcells --analysis-units $m/analysis_units.txt \
--datadir $m &> extract_effects.log
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb Ycov \
--cwd /project2/mstephens/yuxin/ukb-bloodcells
sos run analysis/20210503_ukb_pipeline.ipynb factor_analysis
sos run analysis/20210503_ukb_pipeline.ipynb mixture_model
[global]
parameter: data_dir = path('/project2/mstephens/yuxin/ukb-bloodcells/zscores')
parameter: wd = path('/project2/mstephens/yuxin/ukb-bloodcells/')
parameter: analysis_units = path('/project2/mstephens/yuxin/ukb-bloodcells/zscores/analysis_units.txt')
parameter: suffix = 'ukbbloodcells.rds'
parameter: name = "ukbbloodcells_prepare"
regions = [x.strip() for x in open(analysis_units).readlines() if x.strip() and not x.strip().startswith('#')]
genes = [f"{data_dir:a}/{x}.{suffix}" for x in regions if path(f"{data_dir:a}/{x}.{suffix}").exists()]
[factor_analysis]
script: interpreter= 'qsub', expand = True
#!/bin/bash
#SBATCH --time=36:00:00
#SBATCH --partition=mstephens
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=2000
#SBATCH --job-name={step_name}
#SBATCH --mail-type=BEGIN,END,FAIL
module load R
sos run ~/GitHub/bioworkflows/multivariate-fine-mapping/mixture_prior.ipynb flash --name {name} --cwd {wd} -s build &> {wd}/{name}_factor_analysis.log
sos run ~/GitHub/bioworkflows/multivariate-fine-mapping/mixture_prior.ipynb pca --name {name} --cwd {wd} -s build &>> {wd}/{name}_factor_analysis.log
sos run v canonical --name {name} --cwd {wd} -s build &>> {wd}/{name}_factor_analysis.log
[mixture_model]
def get_cmd(m):
c1 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method ed --residcor Y --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/ed_Y_{m}.log
'''
c2 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method ed --residcor znull --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/ed_znull_{m}.log
'''
c3 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method teem --residcor Y --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/teem_Y_{m}.log
'''
c4 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ud --ud-method teem --residcor znull --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/teem_znull_{m}.log
'''
c5 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ed --residcor Y --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/bovy_Y_{m}.log
'''
c6 = f'''
sos run /project2/mstephens/yuxin/mvarbvs/analysis/multivariate/20201221_ukb_ED_prior.ipynb ed --residcor znull --name {m} --cwd {wd} \
-c ~/GitHub/mvarbvs/midway2.yml -q stephenslab -s build &> {wd}/bovy_Y_{m}.log
'''
return [c1,c2,c3,c4,c5,c6]
cmds = get_cmd(name)
input: for_each = 'cmds'
script: interpreter= 'qsub', expand = True
#!/bin/bash
#SBATCH --time=36:00:00
#SBATCH --partition=mstephens
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=2000
#SBATCH --job-name={step_name}
#SBATCH --mail-type=BEGIN,END,FAIL
module load R
{_cmds}