diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 5f96d48eba52621b0962c5e8ce9c88392894086b..e7a4be11ee99b989a46be8a66aceb3acc43088ad 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -557,7 +557,7 @@ Remove adapter sequences from reads with - Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`, `fq2_3p`, `fq2_5p` - **rule_config.yaml:** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs) + - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) - `-n 2`: search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. (default 1) @@ -579,7 +579,7 @@ Remove poly(A) tails from reads with - **samples.tsv** - Poly(A) stretches to be removed; specify in sample table columns `fq1_polya` and `fq2_polya` - **rule_config.yaml** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs) + - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) - `-O 1`: minimal overlap of 1 (default: 3) - **Output** - Reads file (`.fastq.gz`); used in diff --git a/tests/input_files/rule_config.yaml b/tests/input_files/rule_config.yaml index 2cdec07f884f51f1267c421236ce45b39b45cf09..c7f1242b5851a29c825ba04623ae0804f31ab5cc 100644 --- a/tests/input_files/rule_config.yaml +++ b/tests/input_files/rule_config.yaml @@ -96,23 +96,27 @@ prepare_bigWig: remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' pe_remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' remove_polya_cutadapt: - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1' pe_remove_polya_cutadapt: - # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs) + # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1' diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 37c55460935a5bc1c86716f3ad69efc96e7e5fde..76a88f1dbb6f691f438136a088b73f374920d4dd 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -79,6 +79,7 @@ rule pe_remove_adapters_cutadapt: -g {params.adapter_5_mate1} \ -A {params.adapter_3_mate2} \ -G {params.adapter_5_mate2} \ + -m 1 \ {params.additional_params} \ -o {output.reads1} \ -p {output.reads2} \ @@ -177,6 +178,7 @@ rule pe_remove_polya_cutadapt: -g {params.polya_5_mate1} \ -A {params.polya_3_mate2} \ -G {params.polya_5_mate2} \ + -m 1 \ {params.additional_params} \ -o {output.reads1} \ -p {output.reads2} \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index e270aba84b8a70d69780aa5f31891b991dc8f54f..cd7bb1da424cce1066ebc4e6531dd3ea48204c91 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -66,6 +66,7 @@ rule remove_adapters_cutadapt: -j {threads} \ -a {params.adapters_3} \ -g {params.adapters_5} \ + -m 1 \ {params.additional_params} \ -o {output.reads} \ {input.reads}) \ @@ -140,6 +141,7 @@ rule remove_polya_cutadapt: -j {threads} \ -a {params.polya_3} \ -g {params.polya_5} \ + -m 1 \ {params.additional_params} \ -o {output.reads} \ {input.reads};) \