- pipeline flowchart:
- tool images: https://hub.docker.com/r/kfdrc/
- dockerfiles: https://github.com/d3b-center/bixtools
- tested with
nodejs npm docker
- https://console.cloud.google.com/storage/browser/broad-references/hg38/v0/
- kfdrc bucket: s3://kids-first-seq-data/broad-references/
- cavatica: https://cavatica.sbgenomics.com/u/yuankun/kf-reference/
{
"file_R1": {
"class": "File",
"location": "inputs/11180_S12_L008_R1_001.fastq"
},
"file_R2": {
"class": "File",
"location": "inputs/11180_S12_L008_R2_001.fastq"
},
"rg" : "@RG\\tID:11180\\tSM:11180\\tPL:ILLUMINA",
"indexed_reference_fasta": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.fasta"
},
"ref": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.fasta"
},
"contamination_sites_ud": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.contam.UD"
},
"contamination_sites_mu": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.contam.mu"
},
"contamination_sites_bed": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.contam.bed"
},
"knownsites": [
{
"class": "File",
"location": "inputs/1000G_omni2.5.hg38.vcf.gz"
},
{
"class": "File",
"location": "inputs/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
},
{
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.known_indels.vcf.gz"
},
{
"class": "File",
"location": "inputs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
}
],
"reference_dict": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.dict"
},
"sequence_grouping_tsv": {
"class": "File",
"location": "inputs/sequence_grouping.txt"
},
"wgs_coverage_interval_list": {
"class": "File",
"location": "inputs/wgs_coverage_regions.hg38.interval_list"
},
"wgs_calling_interval_list": {
"class": "File",
"location": "inputs/wgs_calling_regions.hg38.interval_list"
},
"wgs_evaluation_interval_list": {
"class": "File",
"location": "inputs/wgs_evaluation_regions.hg38.interval_list"
},
"dbsnp_vcf": {
"class": "File",
"location": "inputs/Homo_sapiens_assembly38.dbsnp138.vcf"
},
"output_basename" : "11180_S12_L008",
"genome": "hg38",
"intervals_bed": {
"class": "File",
"location": "inputs/MedExome_hg38_capture_targets.bed"
},
"interval_list": {
"class": "File",
"location": "inputs/wgs_calling_regions.hg38.interval_list"
}
}
##Index files:
each vcf, vcf.gz need an .tbi and .idx The reference fasta file need his dictionary and both 32 and 64 bits indexes .dict .64.alt .64.amb .64.ann .64.bwt .64.pac .64.sa .amb .ann .bwt .fai .pac .sa
- sequence_grouping_tsv, generated by
bin/CreateSequenceGroupingTSV.py
- example-inputs.json
git clone https://github.com/cr-ste-justine/chujs-alignment-workflow.git
cd docker
docker login ...
docker pull images.sbgenomics.com/bogdang/sambamba:0.6.3
docker pull kfdrc/python:2.7.13
docker pull kfdrc/gatk:4.0.3.0
docker pull kfdrc/picard:2.18.2-dev
docker pull kfdrc/picard-r:latest-dev
docker pull kfdrc/verifybamid:1.0.2
docker pull kfdrc/gatk:4.beta.1-3.5
docker pull kfdrc/samtools:1.8-dev
docker pull images.sbgenomics.com/bogdang/bwa-kf-bundle:0.1.17
# if not already build
cd snpeff
docker build . --tag=chusj/snpeff
cd ../fastqc
docker build . --tag=chusj/fastqc
# if build
docker pull chusj/snpeff
docker pull chusj/fastqc