center-for-computational-genomics-and-data-science
public
QuaC

Repository

git clone -b master \
    git@gitlab.rc.uab.edu:center-for-computational-genomics-and-data-science/sciops/pipelines/quac.git
cd /path/to/quac/repo

# For use only at Cheaha in UAB. Load conda into environment.
module reset
module load Anaconda3/2020.02

# create conda environment. Needed only the first time.
conda env create --file configs/env/quac.yaml

# activate conda environment
conda activate quac

# if you need to update the existing environment
conda env update --file configs/env/quac.yaml
# cd into exome resources dir
cd <path-to>/VerifyBamID-2.0.1/resource/exome/
sed -e 's/^/chr/' 1000g.phase3.10k.b38.exome.vcf.gz.dat.bed > 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.bed
sed -e 's/^/chr/' 1000g.phase3.10k.b38.exome.vcf.gz.dat.mu > 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.mu
cp 1000g.phase3.10k.b38.exome.vcf.gz.dat.UD 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.UD
cp 1000g.phase3.10k.b38.exome.vcf.gz.dat.V 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.V
$ python src/run_quac.py -h
usage: run_quac.py [-h] [--project_name] [--projects_path] [--pedigree]
                   [--quac_watch_config] [--workflow_config] [--outdir]
                   [--exome] [--include_prior_qc] [--allow_sample_renaming]
                   [--cluster_config] [--log_dir] [-e] [-n] [-l]
                   [--rerun_failed] [--slurm_partition]

Wrapper tool for QuaC pipeline.

optional arguments:
  -h, --help            show this help message and exit

QuaC workflow options:
  --project_name        Project name (default: None)
  --projects_path       Path where all projects are hosted. Do not include
                        project name here. (default:
                        /data/project/worthey_lab/projects/)
  --pedigree            Pedigree filepath. Must correspond to the project
                        supplied via --project_name (default: None)
  --quac_watch_config   YAML config path specifying QC thresholds for QuaC-
                        Watch (default:
                        configs/quac_watch/wgs_quac_watch_config.yaml)
  --workflow_config     YAML config path specifying filepath to dependencies
                        of tools used in QuaC (default: configs/workflow.yaml)
  --outdir              Out directory path (default:
                        $USER_SCRATCH/tmp/quac/results/test_project/analysis)
  --exome               Flag to run the workflow in exome mode. WARNING:
                        Please provide appropriate configs via
                        --quac_watch_config. (default: False)
  --include_prior_qc    Flag to additionally use prior QC data as input. See
                        documentation for more info. (default: False)
  --allow_sample_renaming
                        Flag to allow sample renaming in MultiQC report. See
                        documentation for more info. (default: False)

QuaC wrapper options:
  --cluster_config      Cluster config json file. Needed for snakemake to run
                        jobs in cluster. (default: configs/cluster_config.json)
  --log_dir             Directory path where logs (both workflow's and
                        wrapper's) will be stored (default:
                        $USER_SCRATCH/tmp/quac/logs)
  -e , --extra_args     Pass additional custom args to snakemake. Equal symbol
                        is needed for assignment as in this example: -e='--
                        forceall' (default: None)
  -n, --dryrun          Flag to dry-run snakemake. Does not execute anything,
                        and just display what would be done. Equivalent to '--
                        extra_args "-n"' (default: False)
  -l, --run_locally     Flag to run the snakemake locally and not as a Slurm
                        job. Useful for testing purposes. (default: False)
  --rerun_failed        Number of times snakemake restarts failed jobs. This
                        may be set to >0 to avoid pipeline failing due to job
                        fails due to random SLURM issues (default: 1)
  --slurm_partition     Request a specific partition for the slurm resource
                        allocation for QuaC workflow. Available partitions in
                        Cheaha are: express(max 2 hrs), short(max 12 hrs),
                        medium(max 50 hrs), long(max 150 hrs) (default: short)
module reset
module load Anaconda3/2020.02
module load Singularity/3.5.2-GCC-5.4.0-2.26
conda activate quac

python src/run_quac.py \
      --project_name PROJECT_DUCK \
      --pedigree "path/to/lake/with/pedigree_file.ped"
module reset
module load Anaconda3/2020.02
module load Singularity/3.5.2-GCC-5.4.0-2.26
conda activate quac

# WGS mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/no_priorQC/project_2samples.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_project_wgs/analysis" \
      -e="--conda-create-envs-only"
# to quack on a WGS project, which also has prior QC data
PROJECT="Quack_Quack"
python src/run_quac.py \
      --project_name $PROJECT \
      --pedigree "data/raw/ped/${PROJECT}.ped" \
      --include_prior_qc \
      --allow_sample_renaming


# to quack on a WGS project, run in a medium slurm partition and write results to a dir of choice
PROJECT="Quack_This"
python src/run_quac.py \
      --slurm_partition medium \
      --project_name $PROJECT \
      --pedigree "data/raw/ped/${PROJECT}.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_${PROJECT}/analysis"


# to quack on an exome project
PROJECT="Quack_That"
python src/run_quac.py \
      --project_name $PROJECT \
      --pedigree "data/raw/ped/${PROJECT}.ped" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome

# to quack on an exome project by providing path to that project
PROJECT="Quack_That"
python src/run_quac.py \
      --project_name $PROJECT \
      --projects_path "/path/to/project/${$PROJECT}/" \
      --pedigree "data/raw/ped/${PROJECT}.ped" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome
module reset
module load Anaconda3/2020.02
module load Singularity/3.5.2-GCC-5.4.0-2.26
conda activate quac

########## No prior QC data involved ##########
PROJECT_CONFIG="project_2samples"
PRIOR_QC_STATUS="no_priorQC"

# WGS mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/${PRIOR_QC_STATUS}/${PROJECT_CONFIG}.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_${PROJECT_CONFIG}_wgs-${PRIOR_QC_STATUS}/analysis"

# Exome mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/${PRIOR_QC_STATUS}/${PROJECT_CONFIG}.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_${PROJECT_CONFIG}_exome-${PRIOR_QC_STATUS}/analysis" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome


########## Includes prior QC data and allows sample renaming ##########
PROJECT_CONFIG="project_2samples"
PRIOR_QC_STATUS="include_priorQC"

# WGS mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/${PRIOR_QC_STATUS}/${PROJECT_CONFIG}.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_${PROJECT_CONFIG}_wgs-${PRIOR_QC_STATUS}/analysis" \
      --include_prior_qc \
      --allow_sample_renaming

# Exome mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/${PRIOR_QC_STATUS}/${PROJECT_CONFIG}.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_${PROJECT_CONFIG}_exome-${PRIOR_QC_STATUS}/analysis" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --include_prior_qc \
      --allow_sample_renaming \
      --exome
$ tree $USER_SCRATCH/tmp/quac/results/test_project_2_samples/ -d -L 4
/data/scratch/manag/tmp/quac/results/test_project_2_samples/
└── analysis
    ├── A
    │   └── qc
    │       ├── bcftools-index
    │       │   └── ...
    │       ├── bcftools-stats
    │       │   └── ...
    │       ├── mosdepth
    │       │   └── ...
    │       ├── multiqc_final_pass
    │       │   ├── ...
    │       │   └── A_multiqc.html
    │       ├── multiqc_initial_pass
    │       │   ├── ...
    │       │   └── A_multiqc.html
    │       ├── picard-stats
    │       │   └── ...
    │       ├── quac_watch
    │       │   └── ...
    │       ├── qualimap
    │       │   └── ...
    │       ├── samtools-stats
    │       │   └── ...
    │       └── verifyBamID
    │           └── ...
    ├── B
    │   └── qc
    │       └── same directory structure as that of sample A
    └── project_level_qc
        ├── covviz
        │   └── ...
        ├── indexcov
        │   └── ...
        ├── mosdepth
        │   └── ...
        ├── multiqc
        │   ├── ...
        │   └── multiqc_report.html
        └── somalier
            ├── ancestry
            │   └── ...
            ├── extract
            │   └── ...
            └── relatedness
                └── ...
# open interactive node
srun --ntasks=1 --cpus-per-task=1 --mem-per-cpu=4096 --partition=express --pty /bin/bash

# setup environment
module reset
module load Anaconda3/2020.02
module load Singularity/3.5.2-GCC-5.4.0-2.26
conda activate quac

DAG_DIR="pipeline_visualized"

###### WGS mode ######
# DAG
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project_2_samples.ped \
      --run_locally --extra_args "--dag -F | dot -Tpng > ${DAG_DIR}/wgs_dag.png"

# Rulegraph - less informative than DAG at sample level but less dense than DAG makes this easier to skim
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project_2_samples.ped \
      --run_locally --extra_args "--rulegraph -F | dot -Tpng > ${DAG_DIR}/wgs_rulegraph.png"

###### Exome mode ######
# DAG
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project_2_samples.ped \
      --exome \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --run_locally --extra_args "--dag -F | dot -Tpng > ${DAG_DIR}/exome_dag.png"

# Rulegraph - less informative than DAG at sample level but less dense than DAG makes this easier to skim
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project_2_samples.ped \
      --exome \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --run_locally --extra_args "--rulegraph -F | dot -Tpng > ${DAG_DIR}/exome_rulegraph.png"