diff --git a/pipeline_documentation.md b/pipeline_documentation.md index e7a4be11ee99b989a46be8a66aceb3acc43088ad..9d1ef4b4bca7f411890353828e7952c156ac2642 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -557,7 +557,7 @@ Remove adapter sequences from reads with - Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`, `fq2_3p`, `fq2_5p` - **rule_config.yaml:** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) + - `-m 10`: Discard processed reads that are shorter than 10 nt (cutadapt's default is 0 (see [cutadapt docs][docs-cutadapt-m]), which will keep reads even if they are empty; Because empty reads will cause problems in downstream programs, -m=1 (keep reads only if at least 1nt long) is hardcoded in the snakefile). That value will be overwritten by the value specified in `rule_config.yaml`) - `-n 2`: search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. (default 1) @@ -579,7 +579,7 @@ Remove poly(A) tails from reads with - **samples.tsv** - Poly(A) stretches to be removed; specify in sample table columns `fq1_polya` and `fq2_polya` - **rule_config.yaml** - - `-m 10`: Discard processed reads that are shorter than 10 (default 0; Because empty reads will cause problems in downstream programs, -m=1 is hardcoded in the snakefile. That value will be overwritten by the value specified in `rule_config.yaml`) + - `-m 10`: Discard processed reads that are shorter than 10 nt (cutadapt's default is 0 (see [cutadapt docs][docs-cutadapt-m]), which will keep reads even if they are empty; Because empty reads will cause problems in downstream programs, -m=1 (keep reads only if at least 1nt long) is hardcoded in the snakefile). That value will be overwritten by the value specified in `rule_config.yaml`) - `-O 1`: minimal overlap of 1 (default: 3) - **Output** - Reads file (`.fastq.gz`); used in @@ -690,6 +690,7 @@ Generate pseudoalignments of reads to transcripts with [docs-bedgraphtobigwig]: <http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/> [docs-bedtools]: <https://bedtools.readthedocs.io/en/latest/> [docs-cutadapt]: <https://cutadapt.readthedocs.io/en/stable/> +[docs-cutadapt-m]: <https://cutadapt.readthedocs.io/en/stable/guide.html#filtering-reads> [docs-gffread]: <http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread> [docs-fastqc]: <http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/> [docs-imagemagick]: <https://imagemagick.org/> diff --git a/tests/input_files/rule_config.yaml b/tests/input_files/rule_config.yaml index c7f1242b5851a29c825ba04623ae0804f31ab5cc..25e933b0a5d19855672e0066b26051b496ab114e 100644 --- a/tests/input_files/rule_config.yaml +++ b/tests/input_files/rule_config.yaml @@ -96,27 +96,27 @@ prepare_bigWig: remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; - # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) + # Discard processed reads that are shorter than m (cutadapt's default is 0, keeping reads even if they are empty. ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 (Keep reads only if at least 1nt is left) in both snakefiles; that value will be overwritten by the -m value specified here, e.g. keep reads only if they are at least 10nt long!) -m: '10' pe_remove_adapters_cutadapt: # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2) -n: '2' - # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; - # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) + # Discard processed reads that are shorter than m (cutadapt's default is 0, keeping reads even if they are empty. ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 (Keep reads only if at least 1nt is left) in both snakefiles; that value will be overwritten by the -m value specified here, e.g. keep reads only if they are at least 10nt long!) -m: '10' remove_polya_cutadapt: - # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; - # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) + # Discard processed reads that are shorter than m (cutadapt's default is 0, keeping reads even if they are empty. ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 (Keep reads only if at least 1nt is left) in both snakefiles; that value will be overwritten by the -m value specified here, e.g. keep reads only if they are at least 10nt long!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1' pe_remove_polya_cutadapt: - # Discard processed reads that are shorter than m (default 0, ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; - # Hardcoded to -m=1 in both snakefiles; that value will be overwritten by the -m value specified here!) + # Discard processed reads that are shorter than m (cutadapt's default is 0, keeping reads even if they are empty. ZARP strongly recommends m > 0, because empty reads will cause problems in downstream programs; + # Hardcoded to -m=1 (Keep reads only if at least 1nt is left) in both snakefiles; that value will be overwritten by the -m value specified here, e.g. keep reads only if they are at least 10nt long!) -m: '10' # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As) -O: '1'