From 3796e19bc39f59b7a88ff02d740bc55369bcc64e Mon Sep 17 00:00:00 2001 From: CJ Herrmann <christina.herrmann@unibas.ch> Date: Tue, 13 Jul 2021 22:42:19 +0200 Subject: [PATCH] added cutadapt -m=1 to snakefiles to avoid empty reads, updated doc --- pipeline_documentation.md | 4 ++-- tests/input_files/rule_config.yaml | 12 ++++++++---- workflow/rules/paired_end.snakefile.smk | 2 ++ workflow/rules/single_end.snakefile.smk | 2 ++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 5f96d48..e7a4be1 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -557,7 +557,7 @@ Remove adapter sequences from reads with - Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`, `fq2_3p`, `fq2_5p` - **rule_config.yaml:** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs) + - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) - `-n 2`: search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. (default 1) @@ -579,7 +579,7 @@ Remove poly(A) tails from reads with - **samples.tsv** - Poly(A) stretches to be removed; specify in sample table columns `fq1_polya` and `fq2_polya` - **rule_config.yaml** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs) + - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) - `-O 1`: minimal overlap of 1 (default: 3) - **Output** - Reads file (`.fastq.gz`); used in diff --git a/tests/input_files/rule_config.yaml b/tests/input_files/rule_config.yaml index 2cdec07..c7f1242 100644 --- a/tests/input_files/rule_config.yaml +++ b/tests/input_files/rule_config.yaml @@ -96,23 +96,27 @@ prepare_bigWig: remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' pe_remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' remove_polya_cutadapt: - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1' pe_remove_polya_cutadapt: - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1' diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 37c5546..76a88f1 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -79,6 +79,7 @@ rule pe_remove_adapters_cutadapt: -g {params.adapter_5_mate1} \ -A {params.adapter_3_mate2} \ -G {params.adapter_5_mate2} \ + -m 1 \ {params.additional_params} \ -o {output.reads1} \ -p {output.reads2} \ @@ -177,6 +178,7 @@ rule pe_remove_polya_cutadapt: -g {params.polya_5_mate1} \ -A {params.polya_3_mate2} \ -G {params.polya_5_mate2} \ + -m 1 \ {params.additional_params} \ -o {output.reads1} \ -p {output.reads2} \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index e270aba..cd7bb1d 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -66,6 +66,7 @@ rule remove_adapters_cutadapt: -j {threads} \ -a {params.adapters_3} \ -g {params.adapters_5} \ + -m 1 \ {params.additional_params} \ -o {output.reads} \ {input.reads}) \ @@ -140,6 +141,7 @@ rule remove_polya_cutadapt: -j {threads} \ -a {params.polya_3} \ -g {params.polya_5} \ + -m 1 \ {params.additional_params} \ -o {output.reads} \ {input.reads};) \ -- GitLab