Skip to content
Snippets Groups Projects

Cutadapt set -m=1 to avoid empty reads

Merged CJHerrmann requested to merge cutadapt_min_len into dev
All threads resolved!
Files
4
#############################################################################
#
#
# __________________________________________________________________
# | WARNING: ONLY CHANGE THIS FILE IF YOU KNOW WHAT YOU'RE DOING!!! |
# | ZARP DOES NOT GUARANTEE SENSIBLE RESULTS IF PARAMETERS |
# | ARE CHANGED HERE. |
# |__________________________________________________________________|
#
# RULE CONFIGURATION
#
# RULE CONFIGURATION
#
# For RUN SPECIFIC PARAMETERS (sample specific parameters have to be
# defined in the samples table!)
#
# Specify path to this file in main config.yaml under key 'rule_config'
#
# One top-level keyword per RULE (not per tool, as one tool might be used
# One top-level keyword per RULE (not per tool, as one tool might be used
# with different settings by more than one rule)
#
# Parameters have to be specified exactly like they have to appear on the
# Parameters have to be specified exactly like they have to appear on the
# command line call (e.g. -n or --name)
#
# All values need to be QUOTED STRINGS; to specify flags (i.e., parameters
@@ -33,7 +33,7 @@
# MAIN SNAKEFILE / SEQUENCING-MODE INDEPENDENT #
################################################
#start: No parameters to change here
# start: No parameters to change here
fastqc:
@@ -41,7 +41,7 @@ create_index_star:
extract_transcriptome:
#concatenate_transcriptome_and_genome: No parameters to change here
# concatenate_transcriptome_and_genome: No parameters to change here
create_index_salmon:
@@ -52,7 +52,8 @@ extract_transcripts_as_bed12:
index_genomic_alignment_samtools:
calculate_TIN_scores:
# Minimum number of reads mapped to a transcript (default 10, ZARP recommends 0)
# Minimum number of reads mapped to a transcript (default 10, ZARP
# recommends 0)
-c: '0'
salmon_quantmerge_genes:
@@ -94,53 +95,103 @@ prepare_bigWig:
##########################################
remove_adapters_cutadapt:
# search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
# Search for all the given adapter sequences repeatedly, either until no
# adapter match was found or until n rounds have been performed (default 1,
# ZARP recommends 2)
-n: '2'
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
# Discard processed reads that are shorter than m; note that cutadapt uses
# a default value of m=0, causing reads without any nucleotides remaining
# after proessing to be retained; as "empty reads" will cause errors in
# downstream applications in ZARP, we have changed the default to m=1,
# meaning that only read fragments of at least 1 nt will be retained after
# processing. The default will be overridden by the value specified here,
# but for the reason stated above, we strongly recommend NOT to set m=0;
# cf. https://cutadapt.readthedocs.io/en/stable/guide.html#filtering-reads
-m: '10'
pe_remove_adapters_cutadapt:
# search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
# Search for all the given adapter sequences repeatedly, either until no
# adapter match was found or until n rounds have been performed (default 1,
# ZARP recommends 2)
-n: '2'
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
# Discard processed reads that are shorter than m; note that cutadapt uses
# a default value of m=0, causing reads without any nucleotides remaining
# after proessing to be retained; as "empty reads" will cause errors in
# downstream applications in ZARP, we have changed the default to m=1,
# meaning that only read fragments of at least 1 nt will be retained after
# processing. The default will be overridden by the value specified here,
# but for the reason stated above, we strongly recommend NOT to set m=0;
# cf. https://cutadapt.readthedocs.io/en/stable/guide.html#filtering-reads
-m: '10'
remove_polya_cutadapt:
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
# Discard processed reads that are shorter than m; note that cutadapt uses
# a default value of m=0, causing reads without any nucleotides remaining
# after proessing to be retained; as "empty reads" will cause errors in
# downstream applications in ZARP, we have changed the default to m=1,
# meaning that only read fragments of at least 1 nt will be retained after
# processing. The default will be overridden by the value specified here,
# but for the reason stated above, we strongly recommend NOT to set m=0;
# cf. https://cutadapt.readthedocs.io/en/stable/guide.html#filtering-reads
-m: '10'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
-O: '1'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in
# order to remove all 3' As)
-O: '1'
pe_remove_polya_cutadapt:
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
# Discard processed reads that are shorter than m; note that cutadapt uses
# a default value of m=0, causing reads without any nucleotides remaining
# after proessing to be retained; as "empty reads" will cause errors in
# downstream applications in ZARP, we have changed the default to m=1,
# meaning that only read fragments of at least 1 nt will be retained after
# processing. The default will be overridden by the value specified here,
# but for the reason stated above, we strongly recommend NOT to set m=0;
# cf. https://cutadapt.readthedocs.io/en/stable/guide.html#filtering-reads
-m: '10'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
-O: '1'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in
# order to remove all 3' As)
-O: '1'
map_genome_star:
# the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
# The score range below the maximum score for multimapping alignments
# (default 1, ZARP recommends 0)
--outFilterMultimapScoreRange: '0'
# keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
# Keep only those reads that contain junctions that passed filtering into
# "SJ.out.tab" (default 'Normal', ZARP recommends 'BySJout', as this
# reduces the number of spurious junctions )
--outFilterType: 'BySJout'
pe_map_genome_star:
# the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
# The score range below the maximum score for multimapping alignments
# (default 1, ZARP recommends 0)
--outFilterMultimapScoreRange: '0'
# keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
# Keep only those reads that contain junctions that passed filtering into
# "SJ.out.tab" (default 'Normal', ZARP recommends 'BySJout', as this
# reduces the number of spurious junctions )
--outFilterType: 'BySJout'
quantification_salmon:
# correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
# Correct for sequence specific biases; cf.
# https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
--seqBias: ''
# enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
# Enable selective alignment of the sequencing reads when mapping them to
# the transcriptome; this can improve both the sensitivity and specificity
# of mapping and, as a result, can improve quantification accuracy; cf.
# https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings
--validateMappings: ''
pe_quantification_salmon:
# correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
# Correct for sequence specific biases, cf.
# https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
--seqBias: ''
# enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
# Enable selective alignment of the sequencing reads when mapping them to
# the transcriptome; this can improve both the sensitivity and specificity
# of mapping and, as a result, can improve quantification accuracy; cf.
# https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings
--validateMappings: ''
# write out the names of reads (or mates in paired-end reads) that do not map to the transcriptome. For paired-end this gives flags that indicate how a read failed to map
# Write out the names of reads (or mates in paired-end reads) that do not
# map to the transcriptome. For paired-end libraries this gives flags that
# indicate how a read failed to map
--writeUnmappedNames: ''
genome_quantification_kallisto:
Loading