From d35d5500dac8d03419508931b4537002e756037a Mon Sep 17 00:00:00 2001
From: BIOPZ-Herrmann Christina <christina.herrmann@unibas.ch>
Date: Thu, 15 Apr 2021 18:03:11 +0000
Subject: [PATCH] feat: enable user to configure CLI params per rule

---
 Snakefile                                   | 494 ++++++++++++++++----
 pipeline_documentation.md                   | 108 ++---
 tests/input_files/config.mutliple_lanes.yml |   3 +-
 tests/input_files/config.yaml               |   3 +-
 tests/input_files/rule_config.yaml          | 148 ++++++
 workflow/rules/paired_end.snakefile.smk     | 128 +++--
 workflow/rules/single_end.snakefile.smk     | 119 ++++-
 workflow/scripts/zarp_multiqc_config.py     |   6 +-
 8 files changed, 810 insertions(+), 199 deletions(-)
 create mode 100644 tests/input_files/rule_config.yaml

diff --git a/Snakefile b/Snakefile
index f0087cb..2f5842a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -2,7 +2,11 @@
 import os
 import pandas as pd
 import shutil
+import yaml
+from shlex import quote
+from typing import Tuple
 
+## Preparations
 # Get sample table
 samples_table = pd.read_csv(
     config['samples'],
@@ -13,8 +17,33 @@ samples_table = pd.read_csv(
     sep="\t",
 )
 
+# Parse YAML rule config file
+if 'rule_config' in config and config['rule_config']:
+    try:
+        with open(config['rule_config']) as _file:
+            rule_config = yaml.safe_load(_file)
+        logger.info(f"Loaded rule_config from {config['rule_config']}.")
+    except FileNotFoundError:
+        logger.error(f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! ")
+        raise
+else:
+    rule_config = {}
+    logger.warning(f"No rule config specified: using default values for all tools.")
+
+# Create dir for cluster logs, if applicable
+if cluster_config:
+    os.makedirs(
+        os.path.join(
+            os.getcwd(),
+            os.path.dirname(cluster_config['__default__']['out']),
+        ),
+        exist_ok=True)
+
+
+## Function definitions
 
 def get_sample(column_id, search_id=None, search_value=None):
+    """ Get relevant per sample information from samples table"""
     if search_id:
         if search_id == 'index':
             return str(samples_table[column_id][samples_table.index == search_value][0])
@@ -22,22 +51,59 @@ def get_sample(column_id, search_id=None, search_value=None):
             return str(samples_table[column_id][samples_table[search_id] == search_value][0])
     else:
         return str(samples_table[column_id][0])
-# Global config
-localrules: start, finish, rename_star_rpm_for_alfa, prepare_multiqc_config
 
 
-if cluster_config:
-    os.makedirs(
-        os.path.join(
-            os.getcwd(),
-            os.path.dirname(cluster_config['__default__']['out']),
-        ),
-        exist_ok=True)
+def parse_rule_config(rule_config: dict, current_rule: str, immutable: Tuple[str, ...] = ()):
+    """Get rule specific parameters from rule_config file"""
+    
+    # If rule config file not present, emtpy string will be returned
+    if not rule_config:
+        logger.info(f"No rule config specified: using default values for all tools.")
+        return ''
+    # Same if current rule not specified in rule config
+    if current_rule not in rule_config or not rule_config[current_rule]:
+        logger.info(f"No additional parameters for rule {current_rule} specified: using default settings.")
+        return ''
+
+    # Subset only section for current rule
+    rule_config = rule_config[current_rule]
+    
+    # Build list of parameters and values
+    params_vals = []
+    for param, val in rule_config.items():
+        # Do not allow the user to change wiring-critical, fixed arguments, or arguments that are passed through samples table
+        if param in immutable:
+            raise ValueError(
+                f"The following parameter in rule {current_rule} is critical for the pipeline to "
+                f"function as expected and cannot be modified: {param}"
+            )
+        # Accept only strings; this prevents unintended results potentially
+        # arising from users entering reserved YAML keywords or nested
+        # structures (lists, dictionaries)
+        if isinstance(val, str):
+            params_vals.append(str(param))
+            # Do not include a value for flags (signified by empty strings)
+            if val:
+                params_vals.append(val)
+        else:
+            raise ValueError(
+                "Only string values allowed for tool parameters: Found type "
+                f"'{type(val).__name__}' for value of parameter '{param}'"
+            )
+    # Return quoted string
+    add_params = ' '.join(quote(item) for item in params_vals)
+    logger.info(f"User specified additional parameters for rule {current_rule}:\n {add_params}")
+    return add_params
+
+
+# Global config
+localrules: start, finish, rename_star_rpm_for_alfa, prepare_multiqc_config
 
 # Include subworkflows
 include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
 include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
 
+
 rule finish:
     """
         Rule for collecting outputs
@@ -80,6 +146,8 @@ rule finish:
             "summary_kallisto",
             "genes_tpm.tsv")
 
+
+current_rule = 'start'
 rule start:
     '''
        Get samples
@@ -104,12 +172,12 @@ rule start:
             config["log_dir"],
             "samples",
             "{sample}",
-            "start_{sample}.{mate}.stderr.log"),
+            current_rule + "_{sample}.{mate}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "start_{sample}.{mate}.stdout.log")
+            current_rule + "_{sample}.{mate}.stdout.log")
 
     singularity:
         "docker://bash:5.0.16"
@@ -119,6 +187,7 @@ rule start:
         1> {log.stdout} 2> {log.stderr} "
 
 
+current_rule = 'fastqc'
 rule fastqc:
     '''
         A quality control tool for high throughput sequence data
@@ -139,6 +208,15 @@ rule fastqc:
                 "{sample}",
                 "fastqc",
                 "{mate}"))
+    
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--outdir',
+                )
+            )
 
     threads: 2
 
@@ -150,19 +228,23 @@ rule fastqc:
             config["log_dir"],
             "samples",
             "{sample}",
-            "fastqc_{mate}.stderr.log"),
+            current_rule + "_{mate}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "fastqc_{mate}.stdout.log")
+            current_rule + "_{mate}.stdout.log")
 
     shell:
         "(mkdir -p {output.outdir}; \
-        fastqc --outdir {output.outdir} --threads {threads} {input.reads}) \
+        fastqc --outdir {output.outdir} \
+        --threads {threads} \
+        {params.additional_params} \
+        {input.reads}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'create_index_star'
 rule create_index_star:
     """
         Create index for STAR alignments
@@ -205,7 +287,19 @@ rule create_index_star:
             "{organism}",
             "{index_size}",
             "STAR_index/STAR_"),
-        sjdbOverhang = "{index_size}"
+        sjdbOverhang = "{index_size}",
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--runMode',
+                '--sjdbOverhang',
+                '--genomeDir',
+                '--genomeFastaFiles',
+                '--outFileNamePrefix',
+                '--sjdbGTFfile',
+                )
+            )
 
     singularity:
         "docker://zavolab/star:2.7.3a-slim"
@@ -215,10 +309,10 @@ rule create_index_star:
     log:
         stderr = os.path.join(
             config['log_dir'],
-            "{organism}_{index_size}_create_index_star.stderr.log"),
+            current_rule + "_{organism}_{index_size}.stderr.log"),
         stdout = os.path.join(
             config['log_dir'],
-            "{organism}_{index_size}_create_index_star.stdout.log")
+            current_rule + "_{organism}_{index_size}.stdout.log")
 
     shell:
         "(mkdir -p {params.output_dir}; \
@@ -231,9 +325,11 @@ rule create_index_star:
         --runThreadN {threads} \
         --outFileNamePrefix {params.outFileNamePrefix} \
         --sjdbGTFfile {input.gtf}) \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'extract_transcriptome'
 rule extract_transcriptome:
     """
         Create transcriptome from genome and gene annotations
@@ -256,13 +352,23 @@ rule extract_transcriptome:
             "{organism}",
             "transcriptome.fa"))
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-w',
+                '-g',
+                )
+            )
+
     log:
         stderr = os.path.join(
             config['log_dir'],
-            "{organism}_extract_transcriptome.log"),
+            current_rule + "_{organism}.log"),
         stdout = os.path.join(
             config['log_dir'],
-            "{organism}_extract_transcriptome.log")
+            current_rule + "_{organism}.log")
 
     singularity:
         "docker://zavolab/gffread:0.11.7-slim"
@@ -270,10 +376,13 @@ rule extract_transcriptome:
     shell:
         "(gffread \
         -w {output.transcriptome} \
-        -g {input.genome} {input.gtf}) \
+        -g {input.genome} \
+        {params.additional_params} \
+        {input.gtf}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'concatenate_transcriptome_and_genome' 
 rule concatenate_transcriptome_and_genome:
     """
         Concatenate genome and transcriptome
@@ -304,7 +413,7 @@ rule concatenate_transcriptome_and_genome:
     log:
         stderr = os.path.join(
             config['log_dir'],
-            "{organism}_concatenate_transcriptome_and_genome.stderr.log")
+            current_rule + "_{organism}.stderr.log")
 
     shell:
         "(cat {input.transcriptome} {input.genome} \
@@ -312,6 +421,7 @@ rule concatenate_transcriptome_and_genome:
         2> {log.stderr}"
 
 
+current_rule = 'create_index_salmon'
 rule create_index_salmon:
     """
         Create index for Salmon quantification
@@ -339,7 +449,17 @@ rule create_index_salmon:
                 "salmon.idx"))
 
     params:
-        kmerLen = "{kmer}"
+        kmerLen = "{kmer}",
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--transcripts',
+                '--decoys',
+                '--index',
+                '--kmerLen',
+                )
+            )
 
     singularity:
         "docker://zavolab/salmon:1.1.0-slim"
@@ -347,10 +467,10 @@ rule create_index_salmon:
     log:
         stderr = os.path.join(
             config['log_dir'],
-            "{organism}_{kmer}_create_index_salmon.stderr.log"),
+            current_rule + "_{organism}_{kmer}.stderr.log"),
         stdout = os.path.join(
             config['log_dir'],
-            "{organism}_{kmer}_create_index_salmon.stdout.log")
+            current_rule + "_{organism}_{kmer}.stdout.log")
 
     threads: 8
 
@@ -361,9 +481,11 @@ rule create_index_salmon:
         --index {output.index} \
         --kmerLen {params.kmerLen} \
         --threads {threads}) \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'create_index_kallisto'
 rule create_index_kallisto:
     """
         Create index for Kallisto quantification
@@ -384,7 +506,14 @@ rule create_index_kallisto:
     params:
         output_dir = os.path.join(
             config['kallisto_indexes'],
-            "{organism}")
+            "{organism}"),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-i',
+                )
+            )
 
     singularity:
         "docker://zavolab/kallisto:0.46.1-slim"
@@ -392,18 +521,22 @@ rule create_index_kallisto:
     log:
         stderr = os.path.join(
             config['log_dir'],
-            "{organism}_create_index_kallisto.stderr.log"),
+            current_rule + "_{organism}.stderr.log"),
         stdout = os.path.join(
             config['log_dir'],
-            "{organism}_create_index_kallisto.stdout.log")
+            current_rule + "_{organism}.stdout.log")
 
     shell:
         "(mkdir -p {params.output_dir}; \
         chmod -R 777 {params.output_dir}; \
-        kallisto index -i {output.index} {input.transcriptome}) \
+        kallisto index \
+        {params.additional_params} \
+        -i {output.index} \
+        {input.transcriptome}) \
         1> {log.stdout}  2> {log.stderr}"
 
 
+current_rule = 'extract_transcripts_as_bed12'
 rule extract_transcripts_as_bed12:
     """
         Convert transcripts to BED12 format
@@ -417,6 +550,16 @@ rule extract_transcripts_as_bed12:
             config['output_dir'],
             "full_transcripts_protein_coding.bed"))
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--gtf',
+                '--bed12',
+                )
+            )
+
     singularity:
         "docker://zavolab/zgtf:0.1"
 
@@ -425,18 +568,20 @@ rule extract_transcripts_as_bed12:
     log:
         stdout = os.path.join(
             config['log_dir'],
-            "extract_transcripts_as_bed12.stdout.log"),
+            current_rule + ".stdout.log"),
         stderr = os.path.join(
             config['log_dir'],
-            "extract_transcripts_as_bed12.stderr.log")
+            current_rule + ".stderr.log")
 
     shell:
         "(gtf2bed12 \
         --gtf {input.gtf} \
         --bed12 {output.bed12}); \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'index_genomic_alignment_samtools'
 rule index_genomic_alignment_samtools:
     '''
         Index genome bamfile using samtools
@@ -456,6 +601,13 @@ rule index_genomic_alignment_samtools:
             "map_genome",
             "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai")
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=()
+            )
+
     singularity:
         "docker://zavolab/samtools:1.10-slim"
 
@@ -466,18 +618,21 @@ rule index_genomic_alignment_samtools:
             config["log_dir"],
             "samples",
             "{sample}",
-            "index_genomic_alignment_samtools.{seqmode}.stderr.log"),
+            current_rule + ".{seqmode}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "index_genomic_alignment_samtools.{seqmode}.stdout.log")
+            current_rule + ".{seqmode}.stdout.log")
 
     shell:
-        "(samtools index {input.bam} {output.bai};) \
+        "(samtools index \
+        {params.additional_params} \
+        {input.bam} {output.bai};) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'calculate_TIN_scores'
 rule calculate_TIN_scores:
     """
         Calculate transcript integrity (TIN) score
@@ -522,14 +677,23 @@ rule calculate_TIN_scores:
             "TIN_score.tsv"))
 
     params:
-        sample = "{sample}"
+        sample = "{sample}",
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-i',
+                '-r',
+                '--names',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config['log_dir'],
             "samples",
             "{sample}",
-            "calculate_TIN_scores.log")
+            current_rule + ".log")
 
     threads: 8
 
@@ -540,11 +704,12 @@ rule calculate_TIN_scores:
         "(tin_score_calculation.py \
         -i {input.bam} \
         -r {input.transcripts_bed12} \
-        -c 0 \
         --names {params.sample} \
+        {params.additional_params} \
         > {output.TIN_score};) 2> {log.stderr}"
 
 
+current_rule = 'salmon_quantmerge_genes'
 rule salmon_quantmerge_genes:
     '''
         Merge gene quantifications into a single file
@@ -589,15 +754,27 @@ rule salmon_quantmerge_genes:
         sample_name_list = expand(
             "{sample}",
             sample=pd.unique(samples_table.index.values)),
-        salmon_merge_on = "{salmon_merge_on}"
+        salmon_merge_on = "{salmon_merge_on}",
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--quants',
+                '--genes',
+                '--transcripts',
+                '--names',
+                '--column',
+                '--output',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
+            current_rule + "_{salmon_merge_on}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
+            current_rule + "_{salmon_merge_on}.stdout.log")
 
     threads: 1
 
@@ -611,9 +788,11 @@ rule salmon_quantmerge_genes:
         --names {params.sample_name_list} \
         --column {params.salmon_merge_on} \
         --output {output.salmon_out};) \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'salmon_quantmerge_transcripts'
 rule salmon_quantmerge_transcripts:
     '''
         Merge transcript quantifications into a single file
@@ -655,19 +834,30 @@ rule salmon_quantmerge_transcripts:
                 search_id='index',
                 search_value=i)
                 for i in pd.unique(samples_table.index.values)]),
-
         sample_name_list = expand(
             "{sample}",
             sample=pd.unique(samples_table.index.values)),
-        salmon_merge_on = "{salmon_merge_on}"
+        salmon_merge_on = "{salmon_merge_on}",
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--quants',
+                '--genes',
+                '--transcripts',
+                '--names',
+                '--column',
+                '--output',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
+            current_rule + "_{salmon_merge_on}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
+            current_rule + "_{salmon_merge_on}.stdout.log")
 
     threads: 1
 
@@ -680,9 +870,11 @@ rule salmon_quantmerge_transcripts:
         --names {params.sample_name_list} \
         --column {params.salmon_merge_on} \
         --output {output.salmon_out}) \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule= 'kallisto_merge_genes'
 rule kallisto_merge_genes:
     '''
         Merge gene quantifications into single file
@@ -729,14 +921,25 @@ rule kallisto_merge_genes:
         sample_name_list = ','.join(expand(
             "{sample}",
             sample=pd.unique(samples_table.index.values))),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--input',
+                '--names',
+                '--txOut',
+                '--anno',
+                '--output',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "kallisto_merge_genes.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "kallisto_merge_genes.stdout.log")
+            current_rule + ".stdout.log")
 
     threads: 1
 
@@ -750,10 +953,11 @@ rule kallisto_merge_genes:
         --txOut FALSE \
         --anno {input.gtf} \
         --output {params.dir_out} \
-        --verbose) \
+        {params.additional_params} ) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'kallisto_merge_transcripts'
 rule kallisto_merge_transcripts:
     '''
         Merge transcript quantifications into a single files
@@ -799,14 +1003,24 @@ rule kallisto_merge_transcripts:
         sample_name_list = ','.join(expand(
             "{sample}",
             sample=pd.unique(samples_table.index.values))),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--input',
+                '--names',
+                '--txOut',
+                '--output',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "kallisto_merge_transcripts.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "kallisto_merge_transcripts.stdout.log")
+            current_rule + ".stdout.log")
 
     threads: 1
 
@@ -818,10 +1032,11 @@ rule kallisto_merge_transcripts:
         --input {params.tables} \
         --names {params.sample_name_list} \
         --output {params.dir_out} \
-        --verbose) \
+        {params.additional_params}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'pca_salmon'
 rule pca_salmon:
     input:
         tpm = os.path.join(
@@ -836,13 +1051,23 @@ rule pca_salmon:
             "zpca",
             "pca_salmon_{molecule}"))
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--tpm',
+                '--out',
+                )
+            )
+
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "pca_salmon_{molecule}.stderr.log"),
+            current_rule + "_{molecule}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "pca_salmon_{molecule}.stdout.log")
+            current_rule + "_{molecule}.stdout.log")
 
     threads: 1
 
@@ -853,10 +1078,11 @@ rule pca_salmon:
         "(zpca-tpm  \
         --tpm {input.tpm} \
         --out {output.out} \
-        --verbose) \
+        {params.additional_params}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'pca_kallisto'
 rule pca_kallisto:
     input:
         tpm = os.path.join(
@@ -871,13 +1097,23 @@ rule pca_kallisto:
             "zpca",
             "pca_kallisto_{molecule}"))
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--tpm',
+                '--out',
+                )
+            )
+
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "pca_kallisto_{molecule}.stderr.log"),
+            current_rule + "_{molecule}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "pca_kallisto_{molecule}.stdout.log")
+            current_rule + "_{molecule}.stdout.log")
 
     threads: 1
 
@@ -888,10 +1124,11 @@ rule pca_kallisto:
         "(zpca-tpm  \
         --tpm {input.tpm} \
         --out {output.out} \
-        --verbose) \
+        {params.additional_params}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'star_rpm'
 rule star_rpm:
     '''
         Create stranded bedgraph coverage with STARs RPM normalisation
@@ -958,7 +1195,17 @@ rule star_rpm:
         prefix = lambda wildcards, output:
             os.path.join(
                 os.path.dirname(output.str1),
-                str(wildcards.sample) + "_")
+                str(wildcards.sample) + "_"),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--runMode',
+                '--inputBAMfile',
+                '--outWigType',
+                '--outFileNamePrefix',
+                )
+            )
 
     singularity:
         "docker://zavolab/star:2.7.3a-slim"
@@ -968,12 +1215,12 @@ rule star_rpm:
             config["log_dir"],
             "samples",
             "{sample}",
-            "star_rpm.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "star_rpm.stdout.log")
+            current_rule + ".stdout.log")
 
     threads: 4
 
@@ -986,9 +1233,11 @@ rule star_rpm:
         --inputBAMfile {input.bam} \
         --outWigType bedGraph \
         --outFileNamePrefix {params.prefix}) \
+        {params.additional_params} \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'rename_star_rpm_for_alfa'
 rule rename_star_rpm_for_alfa:
     input:
         plus = lambda wildcards:
@@ -1041,12 +1290,12 @@ rule rename_star_rpm_for_alfa:
             config["log_dir"],
             "samples",
             "{sample}",
-            "rename_star_rpm_for_alfa__{unique}.stderr.log"),
+            current_rule + "_{unique}.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "rename_star_rpm_for_alfa__{unique}.stdout.log")
+            current_rule + "_{unique}.stdout.log")
 
     singularity:
         "docker://bash:5.0.16"
@@ -1057,6 +1306,7 @@ rule rename_star_rpm_for_alfa:
          1>{log.stdout} 2>{log.stderr}"
 
 
+current_rule = 'generate_alfa_index'
 rule generate_alfa_index:
     ''' Generate ALFA index files from sorted GTF file '''
     input:
@@ -1065,7 +1315,6 @@ rule generate_alfa_index:
                 'gtf',
                 search_id='organism',
                 search_value=wildcards.organism)),
-
         chr_len = os.path.join(
             config["star_indexes"],
             "{organism}",
@@ -1090,7 +1339,17 @@ rule generate_alfa_index:
     params:
         genome_index = "sorted_genes",
         out_dir = lambda wildcards, output:
-            os.path.dirname(output.index_stranded)
+            os.path.dirname(output.index_stranded),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-a',
+                '-g',
+                '--chr_len',
+                '-o',
+                )
+            )
 
     threads: 4
 
@@ -1100,16 +1359,19 @@ rule generate_alfa_index:
     log:
         os.path.join(
             config["log_dir"],
-            "{organism}_{index_size}_generate_alfa_index.log")
+            current_rule + "_{organism}_{index_size}.log")
 
     shell:
         "(alfa -a {input.gtf} \
         -g {params.genome_index} \
         --chr_len {input.chr_len} \
         -p {threads} \
-        -o {params.out_dir}) &> {log}"
+        -o {params.out_dir} \
+        {params.additional_params}) \
+        &> {log}"
 
 
+current_rule = 'alfa_qc'
 rule alfa_qc:
     '''
         Run ALFA from stranded bedgraph files
@@ -1169,21 +1431,30 @@ rule alfa_qc:
     params:
         out_dir = lambda wildcards, output:
             os.path.dirname(output.biotypes),
+        genome_index = lambda wildcards, input:
+            os.path.abspath(
+                os.path.join(
+                    os.path.dirname(input.gtf),
+                    "sorted_genes")),
         plus = lambda wildcards, input:
             os.path.basename(input.plus),
         minus = lambda wildcards, input:
             os.path.basename(input.minus),
+        name = "{sample}",
         alfa_orientation = lambda wildcards:
             get_sample(
                 'alfa_directionality',
                 search_id='index',
                 search_value=wildcards.sample),
-        genome_index = lambda wildcards, input:
-            os.path.abspath(
-                os.path.join(
-                    os.path.dirname(input.gtf),
-                    "sorted_genes")),
-        name = "{sample}"
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-g',
+                '--bedgraph',
+                '-s',
+                )
+            )
 
     singularity:
         "docker://zavolab/alfa:1.1.1-slim"
@@ -1193,16 +1464,19 @@ rule alfa_qc:
             config["log_dir"],
             "samples",
             "{sample}",
-            "alfa_qc.{unique}.log")
+            current_rule + ".{unique}.log")
 
     shell:
         "(cd {params.out_dir}; \
         alfa \
         -g {params.genome_index} \
         --bedgraph {params.plus} {params.minus} {params.name} \
-        -s {params.alfa_orientation}) &> {log}"
+        -s {params.alfa_orientation} \
+        {params.additional_params}) \
+        &> {log}"
 
 
+current_rule = 'prepare_multiqc_config'
 rule prepare_multiqc_config:
     '''
         Prepare config for the MultiQC
@@ -1222,25 +1496,36 @@ rule prepare_multiqc_config:
     params:
         logo_path = config['report_logo'],
         multiqc_intro_text = config['report_description'],
-        url = config['report_url']
+        url = config['report_url'],
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--config',
+                '--intro-text',
+                '--custom-logo',
+                '--url',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "prepare_multiqc_config.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "prepare_multiqc_config.stdout.log")
+            current_rule + ".stdout.log")
 
     shell:
         "(python {input.script} \
         --config {output.multiqc_config} \
         --intro-text '{params.multiqc_intro_text}' \
         --custom-logo {params.logo_path} \
-        --url '{params.url}') \
+        --url '{params.url}' \
+        {params.additional_params}) \
         1> {log.stdout} 2> {log.stderr}"
 
-
+current_rule = 'multiqc_report'
 rule multiqc_report:
     '''
         Create report with MultiQC
@@ -1326,15 +1611,23 @@ rule multiqc_report:
     params:
         results_dir = os.path.join(
             config["output_dir"]),
-        log_dir = config["log_dir"]
+        log_dir = config["log_dir"],
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--outdir',
+                '--config',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
-            "multiqc_report.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
-            "multiqc_report.stdout.log")
+            current_rule + ".stdout.log")
 
     singularity:
         "docker://zavolab/multiqc-plugins:1.0.0"
@@ -1343,11 +1636,12 @@ rule multiqc_report:
         "(multiqc \
         --outdir {output.multiqc_report} \
         --config {input.multiqc_config} \
+        {params.additional_params} \
         {params.results_dir} \
         {params.log_dir};) \
         1> {log.stdout} 2> {log.stderr}"
 
-
+current_rule = 'sort_bed_4_big'
 rule sort_bed_4_big:
     '''
         sort bedGraphs in order to work with bedGraphtobigWig
@@ -1370,6 +1664,15 @@ rule sort_bed_4_big:
             "{unique}",
             "{sample}_{unique}_{strand}.sorted.bg"))
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-i',
+                )
+            )
+
     singularity:
         "docker://cjh4zavolab/bedtools:2.27"
 
@@ -1378,13 +1681,16 @@ rule sort_bed_4_big:
             config["log_dir"],
             "samples",
             "{sample}",
-            "sort_bg_{unique}_{strand}.stderr.log")
+            current_rule + "_{unique}_{strand}.stderr.log")
 
     shell:
         "(sortBed \
-         -i {input.bg} \
-         > {output.sorted_bg};) 2> {log.stderr}"
+        -i {input.bg} \
+        {params.additional_params} \
+        > {output.sorted_bg};) 2> {log.stderr}"
+
 
+current_rule = 'prepare_bigWig'
 rule prepare_bigWig:
     '''
         bedGraphtobigWig, for viewing in genome browsers
@@ -1420,6 +1726,13 @@ rule prepare_bigWig:
             "{unique}",
             "{sample}_{unique}_{strand}.bw")
 
+    params:
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=()
+            )
+
     singularity:
         "docker://zavolab/bedgraphtobigwig:4-slim"
 
@@ -1428,17 +1741,18 @@ rule prepare_bigWig:
             config["log_dir"],
             "samples",
             "{sample}",
-            "bigwig_{unique}_{strand}.stderr.log"),
+            current_rule + "_{unique}_{strand}.stderr.log"),
 
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "bigwig_{unique}_{strand}.stdout.log")
+            current_rule + "_{unique}_{strand}.stdout.log")
 
     shell:
         "(bedGraphToBigWig \
-         {input.sorted_bg} \
-         {input.chr_sizes} \
-         {output.bigWig};) \
-         1> {log.stdout} 2> {log.stderr}"
+        {params.additional_params} \
+        {input.sorted_bg} \
+        {input.chr_sizes} \
+        {output.bigWig};) \
+        1> {log.stdout} 2> {log.stderr}"
diff --git a/pipeline_documentation.md b/pipeline_documentation.md
index 04a62fc..5f96d48 100644
--- a/pipeline_documentation.md
+++ b/pipeline_documentation.md
@@ -104,7 +104,7 @@ sample | Descriptive sample name | `str`
 seqmode | Required for various steps of the workflow. One of `pe` (for paired-end libraries) or `se` (for single-end libraries). | `str`
 fq1 | Path of library file in `.fastq.gz` format (or mate 1 read file for paired-end libraries) | `str`
 index_size | Required for [STAR](#third-party-software-used). Ideally the maximum read length minus 1 (`max(ReadLength)-1`). Values lower than maximum read length may result in lower mapping accuracy, while higher values may result in longer processing times. | `int`
-kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values of poor mapping is observed. | `int`
+kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values if poor mapping is observed. | `int`
 fq2 | Path of mate 2 read file in `.fastq.gz` format. Value ignored for for single-end libraries. | `str`
 fq1_3p | Required for [Cutadapt](#third-party-software-used). 3' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str`
 fq1_5p | Required for [Cutadapt](#third-party-software-used). 5' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str`
@@ -156,9 +156,8 @@ Create index for [**STAR**](#third-party-software-used) short read aligner.
   - Genome sequence file (`.fasta`)
   - Gene annotation file (`.gtf`)
 - **Parameters**
-  - `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy,
-    higher values may increase STAR runtime; specify in sample table column
-    `index_size`
+  - **samples.tsv**
+    - `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy, higher values may increase STAR runtime; specify in sample table column `index_size`
 - **Output**
   - STAR index; used in [**map_genome_star**](#map_genome_star)
   - Index includes files:
@@ -215,7 +214,8 @@ Create index for [**Salmon**](#third-party-software-used) quantification.
   - Chromosome name list `chrName.txt`; from
     [**create_index_star**](#create_index_star)
 - **Parameters**
-  - `--kmerLen`: k-mer length; specify in sample table column `kmer`
+  - **samples.tsv**
+    - `--kmerLen`: k-mer length; specify in sample table column `kmer`
 - **Output**
   - Salmon index; used in [**quantification_salmon**](#quantification_salmon)
 
@@ -354,11 +354,13 @@ Calculates the Transcript Integrity Number (TIN) for each transcript with
     [**index_genomic_alignment_samtools**](#index_genomic_alignment_samtools)
   - Transcript annotations file (12-column `.bed`); from
     [**extract_transcripts_as_bed12**](#extract_transcripts_as_bed12)
+- **Parameters**
+  - **rule_config.yaml**
+    - `-c 0`: minimum number of read mapped to a transcript (default 10)
 - **Output**
   - TIN score table (custom `tsv`); used in
     [**merge_TIN_scores**](#merge_tin_scores)
-- **Non-configurable & non-default**
-  - `-c 0`: minimum number of read mapped to a transcript
+  
 
 #### `salmon_quantmerge_genes`
 
@@ -470,8 +472,8 @@ Annotate alignments with [**ALFA**](#third-party-software-used).
     [**rename_star_rpm_for_alfa**](#rename_star_rpm_for_alfa)
   - ALFA index, stranded; from [**generate_alfa_index**](#generate_alfa_index)
 - **Parameters**
-  - `-s`: library orientation; specified by user in sample table column
-    `kallisto_directionality`
+  - **samples.tsv**
+    - `-s`: library orientation; specified by user in sample table column `kallisto_directionality`
 - **Output**
   - Figures for biotypes and feature categories (`.pdf`)
   - Feature counts table (custom `.tsv`); used in
@@ -486,6 +488,11 @@ Prepare config file for [**MultiQC**](#third-party-software-used).
 - **Input**
   - Directories created during
     [**prepare_files_for_report**](#prepare_files_for_report)
+- **Parameters**
+  All parameters for this rule have to be specified in main `config.yaml`
+  - `--intro-text`
+  - `--custom-logo`
+  - `--url`
 - **Output**
   - Config file (`.yaml`); used in [**multiqc_report**](#multiqc_report)
 
@@ -529,6 +536,8 @@ Target rule as required by [Snakemake][docs-snakemake-target-rule].
   - Coverage files, one per strand and sample (`.bw`); used in
     [**prepare_bigWig**](#prepare_bigwig)
 
+
+
 ### Sequencing mode-specific
 
 > Steps described here have two variants, one with the specified names for
@@ -544,16 +553,19 @@ Remove adapter sequences from reads with
 - **Input**
   - Reads file (`.fastq.gz`); from [**start**](#start)
 - **Parameters**
-  - Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`,
+  - **samples.tsv**
+    - Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`,
     `fq2_3p`, `fq2_5p`
+  - **rule_config.yaml:**
+    - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
+    - `-n 2`: search for all the given adapter sequences repeatedly, either until
+    no adapter match was found or until 2 rounds have been performed. (default 1)
+
 - **Output**
   - Reads file (`.fastq.gz`); used in
     [**remove_polya_cutadapt**](#remove_polya_cutadapt)
-- **Non-configurable & non-default**
-  - `-j 8`: use 8 threads
-  - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
-  - `-n 2`: search for all the given adapter sequences repeatedly, either until
-    no adapter match was found or until 2 rounds have been performed. (default 1)
+
+
 
 #### `remove_polya_cutadapt`
 
@@ -564,17 +576,18 @@ Remove poly(A) tails from reads with
   - Reads file (`.fastq.gz`); from
     [**remove_adapters_cutadapt**](#remove_adapters_cutadapt)
 - **Parameters**
-  - Poly(A) stretches to be removed; specify in sample table columns `fq1_polya`
-    and `fq2_polya`
+  - **samples.tsv**
+    - Poly(A) stretches to be removed; specify in sample table columns `fq1_polya` and `fq2_polya`
+  - **rule_config.yaml**
+      - `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
+      - `-O 1`: minimal overlap of 1 (default: 3)
 - **Output**
   - Reads file (`.fastq.gz`); used in
     [**genome_quantification_kallisto**](#genome_quantification_kallisto),
     [**map_genome_star**](#map_genome_star) and
     [**quantification_salmon**](#quantification_salmon)
-- **Non-configurable & non-default**
-  - `-j 8`: use 8 threads
-  - `-m 10`: Discard processed reads that are shorter than 10
-  - `-O 1`: minimal overlap of 1 (default: 3)
+
+
 
 #### `map_genome_star`
 
@@ -586,15 +599,13 @@ Align short reads to reference genome and/or transcriptome with
     [**remove_polya_cutadapt**](#remove_polya_cutadapt)
   - Index; from [**create_index_star**](#create_index_star)
 - **Parameters**
-  - `--outFilterMultimapNmax`: maximum number of multiple alignments allowed;
-    if exceeded, read is considered unmapped; specify in sample table column
-    `multimappers`
-  - `--alignEndsType`: one of `Local` (standard local alignment with
-    soft-clipping allowed) or `EndToEnd` (force end-to-end read alignment, do
-    not soft-clip); specify in sample table column `soft_clip`
-  - `--twopassMode`: one of `None` (1-pass mapping) or `Basic` (basic 2-pass
-    mapping, with all 1st-pass junctions inserted into the genome indices on
-    the fly); specify in sample table column `pass_mode`
+  - **samples.tsv**
+    - `--outFilterMultimapNmax`: maximum number of multiple alignments allowed; if exceeded, read is considered unmapped; specify in sample table column `multimappers`
+    - `--alignEndsType`: one of `Local` (standard local alignment with soft-clipping allowed) or `EndToEnd` (force end-to-end read alignment, do not soft-clip); specify in sample table column `soft_clip`
+    - `--twopassMode`: one of `None` (1-pass mapping) or `Basic` (basic 2-pass mapping, with all 1st-pass junctions inserted into the genome indices on the fly); specify in sample table column `pass_mode`
+  - **rule_config.yaml**
+    - `--outFilterMultimapScoreRange=0`: the score range below the maximum score for multimapping alignments (default 1)
+    - `--outFilterType=BySJout`: reduces the number of ”spurious” junctions
 - **Output**
   - Aligned reads file (`.bam`); used in
     [**calculate_TIN_scores**](#calculate_TIN_scores),
@@ -602,12 +613,9 @@ Align short reads to reference genome and/or transcriptome with
     and [**star_rpm**](#star_rpm)
   - STAR log file
 - **Non-configurable & non-default**
-  - `--outFilterMultimapScoreRange=0`: the score range below the maximum score
-    for multimapping alignments (default 1)
   - `--outSAMattributes=All`: NH HI AS nM NM MD jM jI MC ch
   - `--outStd=BAM_SortedByCoordinate`: which output will be directed to `STDOUT` (default 'Log')
   - `--outSAMtype=BAM SortedByCoordinate`: type of SAM/BAM output (default SAM)
-  - `--outFilterType=BySJout`: reduces the number of ”spurious” junctions
   - `--outSAMattrRGline`: ID:rnaseq_pipeline SM: *sampleID*
 
 #### `quantification_salmon`
@@ -621,12 +629,15 @@ Estimate transcript- and gene-level expression with
   - Filtered annotation file (`.gtf`)
   - Index; from [**create_index_salmon**](#create_index_salmon)
 - **Parameters**
-  - `libType`: see [Salmon manual][docs-salmon] for allowed values; specify in
-    sample table column `libtype`
-  - `--fldMean`: mean of distribution of fragment lengths; specify in sample
-    table column `mean` **(single-end only)**
-  - `--fldSD`: standard deviation of distribution of fragment lengths; specify
-    in sample table column `sd` **(single-end only)**
+  - **samples.tsv**
+    - `libType`: see [Salmon manual][docs-salmon] for allowed values; specify in sample table column `libtype`
+    - `--fldMean`: mean of distribution of fragment lengths; specify in sample table column `mean` **(single-end only)**
+    - `--fldSD`: standard deviation of distribution of fragment lengths; specify in sample table column `sd` **(single-end only)**
+  - **rule_config.yaml**
+    - `--seqBias`: [correct for sequence specific
+    biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias)
+    - `--validateMappings`: enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings).
+    - `--writeUnmappedNames`: write out the names of reads (or mates in paired-end reads) that do not map to the transcriptome. For paired-end this gives flags that indicate how a read failed to map **(paired-end only)**
 - **Output**
   - Gene expression table (`quant.sf`); used in
     [**salmon_quantmerge_genes**](#salmon_quantmerge_genes)
@@ -634,14 +645,7 @@ Estimate transcript- and gene-level expression with
     [**salmon_quantmerge_transcripts**](#salmon_quantmerge_transcripts)
   - `meta_info.json`
   - `flenDist.txt`
-- **Non-configurable & non-default**
-  - `--seqBias`: [correct for sequence specific
-    biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias)
-  - `--validateMappings`: enables selective alignment of the sequencing reads
-    when mapping them to the transcriptome; this can improve both the
-    sensitivity and specificity of mapping and, as a result, can [improve
-    quantification
-    accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings).
+  
 
 #### `genome_quantification_kallisto`
 
@@ -653,16 +657,16 @@ Generate pseudoalignments of reads to transcripts with
     [**remove_polya_cutadapt**](#remove_polya_cutadapt)
   - Index; from [**create_index_kallisto**](#create_index_kallisto)
 - **Parameters**
-  - `directionality`; specify in sample table column `kallisto_directionality`
-  - `-l`: mean of distribution of fragment lengths; specify in sample table
-    column `mean` **(single-end only)**
-  - `-s`: standard deviation of distribution of fragment lengths; specify in
-    sample table column `sd` **(single-end only)**
+  - **samples.tsv**
+    - `directionality`; specify in sample table column `kallisto_directionality`
+    - `-l`: mean of distribution of fragment lengths; specify in sample table column `mean` **(single-end only)**
+    - `-s`: standard deviation of distribution of fragment lengths; specify in sample table column `sd` **(single-end only)**
 - **Output**
   - Pseudoalignments file (`.sam`) and
   - abundance (`.h5`) 
   used in [**kallisto_merge_genes**](#kallisto_merge_genes)
 - **Non-configurable & non-default**
+  - `--single`: Quantify single-end reads **(single-end only)**
   - `--pseudobam`: Save pseudoalignments to transcriptome to BAM file
 
 [code-alfa]: <https://github.com/biocompibens/ALFA>
diff --git a/tests/input_files/config.mutliple_lanes.yml b/tests/input_files/config.mutliple_lanes.yml
index 8cfbb38..3d04a01 100644
--- a/tests/input_files/config.mutliple_lanes.yml
+++ b/tests/input_files/config.mutliple_lanes.yml
@@ -1,5 +1,6 @@
 ---
   samples: "../input_files/samples.multiple_lanes.tsv"
+  rule_config: "../input_files/rule_config.yaml"
   output_dir: "results"
   log_dir: "logs"
   kallisto_indexes: "results/kallisto_indexes"
@@ -9,4 +10,4 @@
   report_description: "No description provided by user"
   report_logo: "../../images/logo.128px.png"
   report_url: "https://zavolan.biozentrum.unibas.ch/"
-...
\ No newline at end of file
+...
diff --git a/tests/input_files/config.yaml b/tests/input_files/config.yaml
index a7e59f6..853a8a7 100644
--- a/tests/input_files/config.yaml
+++ b/tests/input_files/config.yaml
@@ -1,5 +1,6 @@
 ---
   samples: "../input_files/samples.tsv"
+  rule_config: "../input_files/rule_config.yaml"
   output_dir: "results"
   log_dir: "logs"
   kallisto_indexes: "results/kallisto_indexes"
@@ -9,4 +10,4 @@
   report_description: "No description provided by user"
   report_logo: "../../images/logo.128px.png"
   report_url: "https://zavolan.biozentrum.unibas.ch/"
-...
\ No newline at end of file
+...
diff --git a/tests/input_files/rule_config.yaml b/tests/input_files/rule_config.yaml
new file mode 100644
index 0000000..2cdec07
--- /dev/null
+++ b/tests/input_files/rule_config.yaml
@@ -0,0 +1,148 @@
+#############################################################################
+# 
+#   __________________________________________________________________
+#  | WARNING: ONLY CHANGE THIS FILE IF YOU KNOW WHAT YOU'RE DOING!!!  |
+#  |          ZARP DOES NOT GUARANTEE SENSIBLE RESULTS IF PARAMETERS  |
+#  |          ARE CHANGED HERE.                                       |
+#  |__________________________________________________________________|
+# 
+# RULE CONFIGURATION 
+#
+# For RUN SPECIFIC PARAMETERS (sample specific parameters have to be
+# defined in the samples table!)
+#
+# Specify path to this file in main config.yaml under key 'rule_config'
+#
+# One top-level keyword per RULE (not per tool, as one tool might be used 
+# with different settings by more than one rule)
+#
+# Parameters have to be specified exactly like they have to appear on the 
+# command line call (e.g. -n or --name)
+#
+# All values need to be QUOTED STRINGS; to specify flags (i.e., parameters
+# without values), specify an empty string as value.
+#
+# Note: number of threads has to be set in the respective Snakefile
+#
+#############################################################################
+
+
+# Specify parameters for individual rules:
+
+################################################
+# MAIN SNAKEFILE / SEQUENCING-MODE INDEPENDENT #
+################################################
+
+#start: No parameters to change here
+
+fastqc:
+
+create_index_star:
+
+extract_transcriptome:
+
+#concatenate_transcriptome_and_genome: No parameters to change here
+
+create_index_salmon:
+
+create_index_kallisto:
+
+extract_transcripts_as_bed12:
+
+index_genomic_alignment_samtools:
+
+calculate_TIN_scores:
+    # Minimum number of reads mapped to a transcript (default 10, ZARP recommends 0)
+    -c: '0'
+
+salmon_quantmerge_genes:
+
+salmon_quantmerge_transcripts:
+
+kallisto_merge_genes:
+    --verbose: ''
+
+kallisto_merge_transcripts:
+    --verbose: ''
+
+pca_salmon:
+    --verbose: ''
+
+pca_kallisto:
+    --verbose: ''
+
+star_rpm:
+
+# rename_star_rpm_for_alfa: No parameters to change here
+
+generate_alfa_index:
+
+alfa_qc:
+
+prepare_multiqc_config:
+
+multiqc_report:
+
+sort_bed_4_big:
+
+prepare_bigWig:
+
+##########################################
+# SEQUENCING-MODE SPECIFIC               #
+# single-end: rule name without prefix,  #
+# paired-end: rule name with prefix 'pe' #
+##########################################
+
+remove_adapters_cutadapt:
+    # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
+    -n: '2'
+    # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
+    -m: '10'
+
+pe_remove_adapters_cutadapt:
+    # search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
+    -n: '2'
+    # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
+    -m: '10'
+
+remove_polya_cutadapt:
+    # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
+    -m: '10'
+    # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
+    -O: '1' 
+
+pe_remove_polya_cutadapt:
+    # Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
+    -m: '10'
+    # Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
+    -O: '1' 
+
+map_genome_star:
+    # the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
+    --outFilterMultimapScoreRange: '0'
+    # keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
+    --outFilterType: 'BySJout'
+
+pe_map_genome_star:
+    # the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
+    --outFilterMultimapScoreRange: '0'
+    # keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
+    --outFilterType: 'BySJout'
+
+quantification_salmon:
+    # correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
+    --seqBias: ''
+    # enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
+    --validateMappings: ''
+
+pe_quantification_salmon:
+    # correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
+    --seqBias: ''
+    # enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
+    --validateMappings: ''
+    # write out the names of reads (or mates in paired-end reads) that do not map to the transcriptome. For paired-end this gives flags that indicate how a read failed to map
+    --writeUnmappedNames: ''
+
+genome_quantification_kallisto:
+
+pe_genome_quantification_kallisto:
diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk
index 41c87eb..3f7834c 100644
--- a/workflow/rules/paired_end.snakefile.smk
+++ b/workflow/rules/paired_end.snakefile.smk
@@ -1,3 +1,4 @@
+current_rule = 'pe_remove_adapters_cutadapt'
 rule pe_remove_adapters_cutadapt:
     '''
         Remove adapters
@@ -37,7 +38,19 @@ rule pe_remove_adapters_cutadapt:
         adapter_3_mate2 = lambda wildcards:
             get_sample('fq2_3p', search_id='index', search_value=wildcards.sample),
         adapter_5_mate2 = lambda wildcards:
-            get_sample('fq2_5p', search_id='index', search_value=wildcards.sample)
+            get_sample('fq2_5p', search_id='index', search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-a',
+                '-A',
+                '-g',
+                '-G',
+                '-o',
+                '-p',
+                )
+            )
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -49,22 +62,21 @@ rule pe_remove_adapters_cutadapt:
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_adapters_cutadapt.pe.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_adapters_cutadapt.pe.stdout.log")
+            current_rule + ".stdout.log")
 
     shell:
         "(cutadapt \
         -j {threads} \
-        -m 10 \
-        -n 2 \
         -a {params.adapter_3_mate1} \
         -g {params.adapter_5_mate1} \
         -A {params.adapter_3_mate2} \
         -G {params.adapter_5_mate2} \
+        {params.additional_params} \
         -o {output.reads1} \
         -p {output.reads2} \
         {input.reads1} \
@@ -72,6 +84,7 @@ rule pe_remove_adapters_cutadapt:
         1> {log.stdout} 2>{log.stderr}"
 
 
+current_rule = 'pe_remove_polya_cutadapt'
 rule pe_remove_polya_cutadapt:
     '''
         Remove polyA tails
@@ -120,7 +133,19 @@ rule pe_remove_polya_cutadapt:
             get_sample(
                 'fq2_polya_5p',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-a',
+                '-A',
+                '-g',
+                '-G',
+                '-o',
+                '-p',
+                )
+            )
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -132,22 +157,21 @@ rule pe_remove_polya_cutadapt:
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_polya_cutadapt.pe.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_polya_cutadapt.pe.stdout.log")
+            current_rule + ".stdout.log")
 
     shell:
         "(cutadapt \
         -j {threads} \
-        -m 10 \
-        -O 1 \
         -a {params.polya_3_mate1} \
         -g {params.polya_5_mate1} \
         -A {params.polya_3_mate2} \
         -G {params.polya_5_mate2} \
+        {params.additional_params} \
         -o {output.reads1} \
         -p {output.reads2} \
         {input.reads1} \
@@ -155,6 +179,7 @@ rule pe_remove_polya_cutadapt:
         1> {log.stdout} 2>{log.stderr}"
 
 
+current_rule = 'pe_map_genome_star'
 rule pe_map_genome_star:
     '''
         Map to genome using STAR
@@ -171,8 +196,8 @@ rule pe_map_genome_star:
                     'index_size',
                     search_id='index',
                     search_value=wildcards.sample),
-                "STAR_index",
-                "chrNameLength.txt"),
+                    "STAR_index",
+                    "chrNameLength.txt"),
         reads1 = os.path.join(
             config["output_dir"],
             "samples",
@@ -213,7 +238,7 @@ rule pe_map_genome_star:
                     'index_size',
                     search_id='index',
                     search_value=wildcards.sample),
-                "STAR_index")),
+                    "STAR_index")),
         outFileNamePrefix = os.path.join(
             config["output_dir"],
             "samples",
@@ -235,6 +260,23 @@ rule pe_map_genome_star:
                 'pass_mode',
                 search_id='index',
                 search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--twopassMode',
+                '--genomeDir',
+                '--readFilesIn',
+                '--readFilesCommand',
+                '--outFilterMultimapNmax',
+                '--outFileNamePrefix',
+                '--outSAMattributes',
+                '--outStd',
+                '--outSAMtype',
+                '--outSAMattrRGline',
+                '--alignEndsType',
+                )
+            )
 
     singularity:
         "docker://zavolab/star:2.7.3a-slim"
@@ -246,7 +288,7 @@ rule pe_map_genome_star:
             config["log_dir"],
             "samples",
             "{sample}",
-            "map_genome_star.pe.stderr.log")
+            current_rule + ".stderr.log")
 
     shell:
         "(STAR \
@@ -256,17 +298,18 @@ rule pe_map_genome_star:
         --readFilesIn {input.reads1} {input.reads2} \
         --readFilesCommand zcat \
         --outFilterMultimapNmax {params.multimappers} \
-        --outFilterMultimapScoreRange 0 \
         --outFileNamePrefix {params.outFileNamePrefix} \
         --outSAMattributes All \
         --outStd BAM_SortedByCoordinate \
         --outSAMtype BAM SortedByCoordinate \
-        --outFilterType BySJout \
         --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
-        --alignEndsType {params.soft_clip} > {output.bam};) \
+        --alignEndsType {params.soft_clip} \
+        {params.additional_params} \
+        > {output.bam};) \
         2> {log.stderr}"
 
 
+current_rule = 'pe_quantification_salmon'
 rule pe_quantification_salmon:
     '''
         Quantification at transcript and gene level using Salmon
@@ -298,7 +341,7 @@ rule pe_quantification_salmon:
                     'kmer',
                     search_id='index',
                     search_value=wildcards.sample),
-                "salmon.idx")
+                    "salmon.idx")
 
     output:
         gn_estimates = os.path.join(
@@ -340,19 +383,33 @@ rule pe_quantification_salmon:
             get_sample(
                 'libtype',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--libType',
+                '--fldMean',
+                '--fldSD',
+                '--index',
+                '--geneMap',
+                '-1',
+                '-2',
+                '-o',
+                )
+            )
 
     log:
         stderr = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "genome_quantification_salmon.pe.stderr.log"),
+            current_rule + ".stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "genome_quantification_salmon.pe.stdout.log"),
+            current_rule + ".stdout.log"),
 
     threads: 6
 
@@ -362,10 +419,8 @@ rule pe_quantification_salmon:
     shell:
         "(salmon quant \
         --libType {params.libType} \
-        --seqBias \
-        --validateMappings \
         --threads {threads} \
-        --writeUnmappedNames \
+        {params.additional_params} \
         --index {input.index} \
         --geneMap {input.gtf} \
         -1 {input.reads1} \
@@ -374,6 +429,7 @@ rule pe_quantification_salmon:
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'pe_genome_quantification_kallisto'
 rule pe_genome_quantification_kallisto:
     '''
         Quantification at transcript and gene level using Kallisto
@@ -396,7 +452,7 @@ rule pe_genome_quantification_kallisto:
                     'organism',
                     search_id='index',
                     search_value=wildcards.sample),
-                "kallisto.idx")
+                    "kallisto.idx")
 
     output:
         pseudoalignment = os.path.join(
@@ -424,7 +480,22 @@ rule pe_genome_quantification_kallisto:
             get_sample(
                 'kallisto_directionality',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--single',
+                '-i',
+                '-o',
+                '-l',
+                '-s',
+                '--pseudobam',
+                '--fr-stranded',
+                '--rf-stranded',
+                )
+            )
+
 
     singularity:
         "docker://zavolab/kallisto:0.46.1-slim"
@@ -436,15 +507,16 @@ rule pe_genome_quantification_kallisto:
             config["log_dir"],
             "samples",
             "{sample}",
-            "genome_quantification_kallisto.pe.stderr.log")
+            current_rule + ".stderr.log")
 
     shell:
         "(kallisto quant \
         -i {input.index} \
         -o {params.output_dir} \
-        --pseudobam \
         -t {threads} \
         {params.directionality}-stranded \
+        {params.additional_params} \
+        --pseudobam \
         {input.reads1} {input.reads2} > {output.pseudoalignment}) \
         2> {log.stderr}"
 
diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk
index 9563e06..337ae75 100644
--- a/workflow/rules/single_end.snakefile.smk
+++ b/workflow/rules/single_end.snakefile.smk
@@ -1,3 +1,4 @@
+current_rule = 'remove_adapters_cutadapt'
 rule remove_adapters_cutadapt:
     '''
         Remove adapters
@@ -27,7 +28,19 @@ rule remove_adapters_cutadapt:
             get_sample(
                 'fq1_5p',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-a',
+                '-A',
+                '-g',
+                '-G',
+                '-o',
+                '-p',
+                )
+            )
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -39,24 +52,24 @@ rule remove_adapters_cutadapt:
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_adapters_cutadapt.se.stderr.log"),
+            current_rule + ".se.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_adapters_cutadapt.se.stdout.log")
+            current_rule + ".se.stdout.log")
     shell:
         "(cutadapt \
         -j {threads} \
-        -m 10 \
-        -n 2 \
         -a {params.adapters_3} \
         -g {params.adapters_5} \
+        {params.additional_params} \
         -o {output.reads} \
         {input.reads}) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'remove_polya_cutadapt'
 rule remove_polya_cutadapt:
     '''
         Remove ployA  tails
@@ -85,7 +98,19 @@ rule remove_polya_cutadapt:
             get_sample(
                 'fq1_polya_5p',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '-a',
+                '-A',
+                '-g',
+                '-G',
+                '-o',
+                '-p',
+                )
+            )
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -97,25 +122,25 @@ rule remove_polya_cutadapt:
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_polya_cutadapt.se.stderr.log"),
+            current_rule + ".se.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "remove_polya_cutadapt.se.stdout.log")
+            current_rule + ".se.stdout.log")
 
     shell:
         "(cutadapt \
         -j {threads} \
-        -O 1 \
-        -m 10  \
         -a {params.polya_3} \
         -g {params.polya_5} \
+        {params.additional_params} \
         -o {output.reads} \
         {input.reads};) \
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'map_genome_star'
 rule map_genome_star:
     '''
         Map to genome using STAR
@@ -178,7 +203,24 @@ rule map_genome_star:
             get_sample(
                 'pass_mode',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--twopassMode',
+                '--genomeDir',
+                '--readFilesIn',
+                '--readFilesCommand',
+                '--outFilterMultimapNmax',
+                '--outFileNamePrefix',
+                '--outSAMattributes',
+                '--outStd',
+                '--outSAMtype',
+                '--outSAMattrRGline',
+                '--alignEndsType',
+                )
+            )
 
     singularity:
         "docker://zavolab/star:2.7.3a-slim"
@@ -190,7 +232,7 @@ rule map_genome_star:
             config["log_dir"],
             "samples",
             "{sample}",
-            "map_genome_star.se.stderr.log")
+            current_rule + ".se.stderr.log")
 
     shell:
         "(STAR \
@@ -200,17 +242,18 @@ rule map_genome_star:
         --readFilesIn {input.reads} \
         --readFilesCommand zcat \
         --outFilterMultimapNmax {params.multimappers} \
-        --outFilterMultimapScoreRange 0 \
         --outFileNamePrefix {params.outFileNamePrefix} \
         --outSAMattributes All \
         --outStd BAM_SortedByCoordinate \
         --outSAMtype BAM SortedByCoordinate \
-        --outFilterType BySJout \
         --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
-        --alignEndsType {params.soft_clip} > {output.bam};) \
+        --alignEndsType {params.soft_clip} \
+        {params.additional_params} \
+        > {output.bam};) \
         2> {log.stderr}"
 
 
+current_rule = 'quantification_salmon'
 rule quantification_salmon:
     '''
         Quantification at transcript and gene level using Salmon
@@ -289,18 +332,31 @@ rule quantification_salmon:
             get_sample(
                 'sd',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--libType',
+                '--fldMean',
+                '--fldSD',
+                '--index',
+                '--geneMap',
+                '--unmatedReads',
+                '-o',
+                )
+            )
     log:
         stderr = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "quantification_salmon.se.stderr.log"),
+            current_rule + ".se.stderr.log"),
         stdout = os.path.join(
             config["log_dir"],
             "samples",
             "{sample}",
-            "quantification_salmon.se.stdout.log")
+            current_rule + ".se.stdout.log")
 
     threads: 12
 
@@ -310,11 +366,10 @@ rule quantification_salmon:
     shell:
         "(salmon quant \
         --libType {params.libType} \
-        --seqBias \
-        --validateMappings \
         --threads {threads} \
         --fldMean {params.fraglen} \
         --fldSD {params.fragsd} \
+        {params.additional_params} \
         --index {input.index} \
         --geneMap {input.gtf} \
         --unmatedReads {input.reads} \
@@ -322,6 +377,7 @@ rule quantification_salmon:
         1> {log.stdout} 2> {log.stderr}"
 
 
+current_rule = 'genome_quantification_kallisto'
 rule genome_quantification_kallisto:
     '''
         Quantification at transcript and gene level using Kallisto
@@ -377,7 +433,21 @@ rule genome_quantification_kallisto:
             get_sample(
                 'kallisto_directionality',
                 search_id='index',
-                search_value=wildcards.sample)
+                search_value=wildcards.sample),
+        additional_params = parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                '--single',
+                '-i',
+                '-o',
+                '-l',
+                '-s',
+                '--pseudobam',
+                '--fr-stranded',
+                '--rf-stranded',
+                )
+            )
 
     threads: 8
 
@@ -386,21 +456,22 @@ rule genome_quantification_kallisto:
             config["log_dir"],
             "samples",
             "{sample}",
-            "genome_quantification_kallisto.se.stderr.log")
+            current_rule + ".se.stderr.log")
 
     singularity:
         "docker://zavolab/kallisto:0.46.1-slim"
 
     shell:
         "(kallisto quant \
+        --single \
         -i {input.index} \
         -o {params.output_dir} \
-        --single \
         -l {params.fraglen} \
         -s {params.fragsd} \
-        --pseudobam \
         -t {threads} \
         {params.directionality}-stranded \
+        {params.additional_params} \
+        --pseudobam \
         {input.reads} > {output.pseudoalignment};) \
         2> {log.stderr}"
 
diff --git a/workflow/scripts/zarp_multiqc_config.py b/workflow/scripts/zarp_multiqc_config.py
index 0b72f85..40a469e 100644
--- a/workflow/scripts/zarp_multiqc_config.py
+++ b/workflow/scripts/zarp_multiqc_config.py
@@ -98,12 +98,12 @@ module_order:
   - cutadapt:
       name: "Cutadapt: adapter removal"
       path_filters:
-      - "*/*/remove_adapters_cutadapt*.stdout.log"
+      - "*/*/*remove_adapters_cutadapt*.stdout.log"
 
   - cutadapt:
       name: "Cutadapt: polyA tails removal"
       path_filters:
-      - "*/*/remove_polya_cutadapt*.stdout.log"
+      - "*/*/*remove_polya_cutadapt*.stdout.log"
 
   - star:
       path_filters:
@@ -123,7 +123,7 @@ module_order:
 
   - kallisto:
       path_filters:
-      - "*/*/genome_quantification_kallisto*.stderr.log"
+      - "*/*/*genome_quantification_kallisto*.stderr.log"
 
 fn_clean_exts:
   - '.fq1'
-- 
GitLab