diff --git a/Snakefile b/Snakefile index eeb801f8d26cfdbe2ad2c6a9df16cfea8601d625..2f5842ad042cb5bd14cb17afb3d72795efff88fc 100644 --- a/Snakefile +++ b/Snakefile @@ -210,7 +210,13 @@ rule fastqc: "{mate}")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--outdir',)) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--outdir', + ) + ) threads: 2 @@ -282,7 +288,18 @@ rule create_index_star: "{index_size}", "STAR_index/STAR_"), sjdbOverhang = "{index_size}", - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--runMode','--sjdbOverhang', '--genomeDir', '--genomeFastaFiles', '--outFileNamePrefix', '--sjdbGTFfile')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--runMode', + '--sjdbOverhang', + '--genomeDir', + '--genomeFastaFiles', + '--outFileNamePrefix', + '--sjdbGTFfile', + ) + ) singularity: "docker://zavolab/star:2.7.3a-slim" @@ -336,7 +353,14 @@ rule extract_transcriptome: "transcriptome.fa")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-w', '-g')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-w', + '-g', + ) + ) log: stderr = os.path.join( @@ -426,7 +450,16 @@ rule create_index_salmon: params: kmerLen = "{kmer}", - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--transcripts','--decoys', '--index', '--kmerLen')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--transcripts', + '--decoys', + '--index', + '--kmerLen', + ) + ) singularity: "docker://zavolab/salmon:1.1.0-slim" @@ -474,7 +507,13 @@ rule create_index_kallisto: output_dir = os.path.join( config['kallisto_indexes'], "{organism}"), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-i',)) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-i', + ) + ) singularity: "docker://zavolab/kallisto:0.46.1-slim" @@ -512,7 +551,14 @@ rule extract_transcripts_as_bed12: "full_transcripts_protein_coding.bed")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--gtf', '--bed12')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--gtf', + '--bed12', + ) + ) singularity: "docker://zavolab/zgtf:0.1" @@ -556,7 +602,11 @@ rule index_genomic_alignment_samtools: "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai") params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=()) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=() + ) singularity: "docker://zavolab/samtools:1.10-slim" @@ -628,7 +678,15 @@ rule calculate_TIN_scores: params: sample = "{sample}", - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-i', '-r', '--names')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-i', + '-r', + '--names', + ) + ) log: stderr = os.path.join( @@ -697,7 +755,18 @@ rule salmon_quantmerge_genes: "{sample}", sample=pd.unique(samples_table.index.values)), salmon_merge_on = "{salmon_merge_on}", - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--quants', '--genes', '--transcripts', '--names', '--column', '--output')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--quants', + '--genes', + '--transcripts', + '--names', + '--column', + '--output', + ) + ) log: stderr = os.path.join( @@ -765,12 +834,22 @@ rule salmon_quantmerge_transcripts: search_id='index', search_value=i) for i in pd.unique(samples_table.index.values)]), - sample_name_list = expand( "{sample}", sample=pd.unique(samples_table.index.values)), salmon_merge_on = "{salmon_merge_on}", - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--quants', '--genes', '--transcripts', '--names', '--column', '--output')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--quants', + '--genes', + '--transcripts', + '--names', + '--column', + '--output', + ) + ) log: stderr = os.path.join( @@ -842,7 +921,17 @@ rule kallisto_merge_genes: sample_name_list = ','.join(expand( "{sample}", sample=pd.unique(samples_table.index.values))), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--input', '--names', '--txOut', '--anno', '--output')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--input', + '--names', + '--txOut', + '--anno', + '--output', + ) + ) log: stderr = os.path.join( @@ -914,7 +1003,16 @@ rule kallisto_merge_transcripts: sample_name_list = ','.join(expand( "{sample}", sample=pd.unique(samples_table.index.values))), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--input', '--names', '--txOut', '--output')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--input', + '--names', + '--txOut', + '--output', + ) + ) log: stderr = os.path.join( @@ -954,7 +1052,14 @@ rule pca_salmon: "pca_salmon_{molecule}")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--tpm', '--out')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--tpm', + '--out', + ) + ) log: stderr = os.path.join( @@ -993,7 +1098,14 @@ rule pca_kallisto: "pca_kallisto_{molecule}")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--tpm', '--out')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--tpm', + '--out', + ) + ) log: stderr = os.path.join( @@ -1084,7 +1196,16 @@ rule star_rpm: os.path.join( os.path.dirname(output.str1), str(wildcards.sample) + "_"), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--runMode', '--inputBAMfile', '--outWigType', '--outFileNamePrefix')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--runMode', + '--inputBAMfile', + '--outWigType', + '--outFileNamePrefix', + ) + ) singularity: "docker://zavolab/star:2.7.3a-slim" @@ -1194,7 +1315,6 @@ rule generate_alfa_index: 'gtf', search_id='organism', search_value=wildcards.organism)), - chr_len = os.path.join( config["star_indexes"], "{organism}", @@ -1220,7 +1340,16 @@ rule generate_alfa_index: genome_index = "sorted_genes", out_dir = lambda wildcards, output: os.path.dirname(output.index_stranded), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-a', '-g', '--chr_len', '-o')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-a', + '-g', + '--chr_len', + '-o', + ) + ) threads: 4 @@ -1317,7 +1446,15 @@ rule alfa_qc: 'alfa_directionality', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-g', '--bedgraph', '-s')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-g', + '--bedgraph', + '-s', + ) + ) singularity: "docker://zavolab/alfa:1.1.1-slim" @@ -1360,7 +1497,16 @@ rule prepare_multiqc_config: logo_path = config['report_logo'], multiqc_intro_text = config['report_description'], url = config['report_url'], - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--config', '--intro-text', '--custom-logo', '--url')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--config', + '--intro-text', + '--custom-logo', + '--url', + ) + ) log: stderr = os.path.join( @@ -1466,7 +1612,14 @@ rule multiqc_report: results_dir = os.path.join( config["output_dir"]), log_dir = config["log_dir"], - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('--outdir', '--config')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--outdir', + '--config', + ) + ) log: stderr = os.path.join( @@ -1512,7 +1665,13 @@ rule sort_bed_4_big: "{sample}_{unique}_{strand}.sorted.bg")) params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=('-i', )) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-i', + ) + ) singularity: "docker://cjh4zavolab/bedtools:2.27" @@ -1568,7 +1727,11 @@ rule prepare_bigWig: "{sample}_{unique}_{strand}.bw") params: - additional_params = parse_rule_config(rule_config, current_rule=current_rule, immutable=()) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=() + ) singularity: "docker://zavolab/bedgraphtobigwig:4-slim" diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 65c22a2d54ecda99dd71aeb102a40bc4420fecc8..5f96d48eba52621b0962c5e8ce9c88392894086b 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -104,7 +104,7 @@ sample | Descriptive sample name | `str` seqmode | Required for various steps of the workflow. One of `pe` (for paired-end libraries) or `se` (for single-end libraries). | `str` fq1 | Path of library file in `.fastq.gz` format (or mate 1 read file for paired-end libraries) | `str` index_size | Required for [STAR](#third-party-software-used). Ideally the maximum read length minus 1 (`max(ReadLength)-1`). Values lower than maximum read length may result in lower mapping accuracy, while higher values may result in longer processing times. | `int` -kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values of poor mapping is observed. | `int` +kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values if poor mapping is observed. | `int` fq2 | Path of mate 2 read file in `.fastq.gz` format. Value ignored for for single-end libraries. | `str` fq1_3p | Required for [Cutadapt](#third-party-software-used). 3' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str` fq1_5p | Required for [Cutadapt](#third-party-software-used). 5' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str` @@ -157,7 +157,7 @@ Create index for [**STAR**](#third-party-software-used) short read aligner. - Gene annotation file (`.gtf`) - **Parameters** - **samples.tsv** - - `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy,higher values may increase STAR runtime; specify in sample table column `index_size` + - `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy, higher values may increase STAR runtime; specify in sample table column `index_size` - **Output** - STAR index; used in [**map_genome_star**](#map_genome_star) - Index includes files: diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 153ce3ab923ec41537823abc5196c4d1aa81cb5e..3f7834cb49f85437ad83fade6cf66b91e2b682ec 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -39,7 +39,18 @@ rule pe_remove_adapters_cutadapt: get_sample('fq2_3p', search_id='index', search_value=wildcards.sample), adapter_5_mate2 = lambda wildcards: get_sample('fq2_5p', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('-a', '-A', '-g', '-G', '-o', '-p')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-a', + '-A', + '-g', + '-G', + '-o', + '-p', + ) + ) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -123,7 +134,18 @@ rule pe_remove_polya_cutadapt: 'fq2_polya_5p', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('-a', '-A', '-g', '-G', '-o', '-p')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-a', + '-A', + '-g', + '-G', + '-o', + '-p', + ) + ) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -174,8 +196,8 @@ rule pe_map_genome_star: 'index_size', search_id='index', search_value=wildcards.sample), - "STAR_index", - "chrNameLength.txt"), + "STAR_index", + "chrNameLength.txt"), reads1 = os.path.join( config["output_dir"], "samples", @@ -216,7 +238,7 @@ rule pe_map_genome_star: 'index_size', search_id='index', search_value=wildcards.sample), - "STAR_index")), + "STAR_index")), outFileNamePrefix = os.path.join( config["output_dir"], "samples", @@ -238,7 +260,23 @@ rule pe_map_genome_star: 'pass_mode', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--twopassMode', '--genomeDir', '--readFilesIn', '--readFilesCommand', '--outFilterMultimapNmax', '--outFileNamePrefix', '--outSAMattributes', '--outStd', '--outSAMtype', '--outSAMattrRGline', '--alignEndsType')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--twopassMode', + '--genomeDir', + '--readFilesIn', + '--readFilesCommand', + '--outFilterMultimapNmax', + '--outFileNamePrefix', + '--outSAMattributes', + '--outStd', + '--outSAMtype', + '--outSAMattrRGline', + '--alignEndsType', + ) + ) singularity: "docker://zavolab/star:2.7.3a-slim" @@ -303,7 +341,7 @@ rule pe_quantification_salmon: 'kmer', search_id='index', search_value=wildcards.sample), - "salmon.idx") + "salmon.idx") output: gn_estimates = os.path.join( @@ -346,7 +384,20 @@ rule pe_quantification_salmon: 'libtype', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--libType', '--fldMean', '--fldSD', '--index', '--geneMap', '-1', '-2', '-o')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--libType', + '--fldMean', + '--fldSD', + '--index', + '--geneMap', + '-1', + '-2', + '-o', + ) + ) log: stderr = os.path.join( @@ -401,7 +452,7 @@ rule pe_genome_quantification_kallisto: 'organism', search_id='index', search_value=wildcards.sample), - "kallisto.idx") + "kallisto.idx") output: pseudoalignment = os.path.join( @@ -430,7 +481,20 @@ rule pe_genome_quantification_kallisto: 'kallisto_directionality', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--single', '-i', '-o', '-l', '-s', '--pseudobam', '--fr-stranded', '--rf-stranded')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--single', + '-i', + '-o', + '-l', + '-s', + '--pseudobam', + '--fr-stranded', + '--rf-stranded', + ) + ) singularity: diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index 37cc4192ae31a3757161163c2fd86556c30106fd..337ae752b58ddaccdfcd351b63e6cc18b2982934 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -29,7 +29,18 @@ rule remove_adapters_cutadapt: 'fq1_5p', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('-a', '-A', '-g', '-G', '-o', '-p')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-a', + '-A', + '-g', + '-G', + '-o', + '-p', + ) + ) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -88,7 +99,18 @@ rule remove_polya_cutadapt: 'fq1_polya_5p', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('-a', '-A', '-g', '-G', '-o', '-p')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '-a', + '-A', + '-g', + '-G', + '-o', + '-p', + ) + ) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -182,7 +204,23 @@ rule map_genome_star: 'pass_mode', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--twopassMode', '--genomeDir', '--readFilesIn', '--readFilesCommand', '--outFilterMultimapNmax', '--outFileNamePrefix', '--outSAMattributes', '--outStd', '--outSAMtype', '--outSAMattrRGline', '--alignEndsType')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--twopassMode', + '--genomeDir', + '--readFilesIn', + '--readFilesCommand', + '--outFilterMultimapNmax', + '--outFileNamePrefix', + '--outSAMattributes', + '--outStd', + '--outSAMtype', + '--outSAMattrRGline', + '--alignEndsType', + ) + ) singularity: "docker://zavolab/star:2.7.3a-slim" @@ -295,7 +333,19 @@ rule quantification_salmon: 'sd', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--libType', '--fldMean', '--fldSD', '--index', '--geneMap', '--unmatedReads', '-o')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--libType', + '--fldMean', + '--fldSD', + '--index', + '--geneMap', + '--unmatedReads', + '-o', + ) + ) log: stderr = os.path.join( config["log_dir"], @@ -384,7 +434,20 @@ rule genome_quantification_kallisto: 'kallisto_directionality', search_id='index', search_value=wildcards.sample), - additional_params = parse_rule_config(rule_config,current_rule=current_rule, immutable=('--single', '-i', '-o', '-l', '-s', '--pseudobam', '--fr-stranded', '--rf-stranded')) + additional_params = parse_rule_config( + rule_config, + current_rule=current_rule, + immutable=( + '--single', + '-i', + '-o', + '-l', + '-s', + '--pseudobam', + '--fr-stranded', + '--rf-stranded', + ) + ) threads: 8