From e02c5e7c719dbdbbb162a1f4f154ba33b27eeb0c Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:16:07 +0000
Subject: [PATCH 01/22] Upload Snakefile of the merged workflows

---
 workflow/Snakefile | 1941 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1941 insertions(+)
 create mode 100644 workflow/Snakefile

diff --git a/workflow/Snakefile b/workflow/Snakefile
new file mode 100644
index 0000000..2a37bcf
--- /dev/null
+++ b/workflow/Snakefile
@@ -0,0 +1,1941 @@
+"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
+import os
+import pandas as pd
+import shutil
+import yaml
+from shlex import quote
+from typing import Tuple
+from snakemake.utils import validate
+
+
+configfile: os.path.join(workflow.basedir, "..", "config", "config.yaml")
+
+
+## Preparations
+# Get sample table
+samples_table = pd.read_csv(
+    config["samples"],
+    header=0,
+    index_col=0,
+    comment="#",
+    engine="python",
+    sep="\t",
+)
+
+# dict for translation of "directionality parameters"
+directionality_dict = {
+    "SF": {
+        "kallisto": "--fr-stranded",
+        "alfa": "fr-secondstrand",
+        "alfa_plus": "str1",
+        "alfa_minus": "str2",
+    },
+    "SR": {
+        "kallisto": "--rf-stranded",
+        "alfa": "fr-firststrand",
+        "alfa_plus": "str2",
+        "alfa_minus": "str1",
+    },
+}
+# dict for alfa output"
+alfa_dict = {"MultimappersIncluded": "UniqueMultiple", "UniqueMappers": "Unique"}
+
+# Validate config
+validate(config, os.path.join("..", "resources", "config_schema.json"))
+logger.info(f"Config file after validation: {config}")
+
+# Parse YAML rule config file
+try:
+    with open(config["rule_config"]) as _file:
+        rule_config = yaml.safe_load(_file)
+    logger.info(f"Loaded rule_config from {config['rule_config']}.")
+except TypeError:
+    logger.error(
+        f'No string supplied at field "rule_config", but: {type(config["rule_config"])} with content: {config["rule_config"]}'
+    )
+    raise
+except FileNotFoundError:
+    logger.error(
+        f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! "
+    )
+    raise
+except KeyError:
+    rule_config = {}
+    logger.warning(f"No rule config specified: using default values for all tools.")
+
+
+def parse_rule_config(
+    rule_config: dict, current_rule: str, immutable: Tuple[str, ...] = ()
+):
+    """Get rule specific parameters from rule_config file"""
+
+    # If rule config file not present, emtpy string will be returned
+    if not rule_config:
+        logger.info(f"No rule config specified: using default values for all tools.")
+        return ""
+    # Same if current rule not specified in rule config
+    if current_rule not in rule_config or not rule_config[current_rule]:
+        logger.info(
+            f"No additional parameters for rule {current_rule} specified: using default settings."
+        )
+        return ""
+
+    # Subset only section for current rule
+    rule_config = rule_config[current_rule]
+
+    # Build list of parameters and values
+    params_vals = []
+    for param, val in rule_config.items():
+        # Do not allow the user to change wiring-critical, fixed arguments, or arguments that are passed through samples table
+        if param in immutable:
+            raise ValueError(
+                f"The following parameter in rule {current_rule} is critical for the pipeline to "
+                f"function as expected and cannot be modified: {param}"
+            )
+        # Accept only strings; this prevents unintended results potentially
+        # arising from users entering reserved YAML keywords or nested
+        # structures (lists, dictionaries)
+        if isinstance(val, str):
+            params_vals.append(str(param))
+            # Do not include a value for flags (signified by empty strings)
+            if val:
+                params_vals.append(val)
+        else:
+            raise ValueError(
+                "Only string values allowed for tool parameters: Found type "
+                f"'{type(val).__name__}' for value of parameter '{param}'"
+            )
+    # Return quoted string
+    add_params = " ".join(quote(item) for item in params_vals)
+    logger.info(
+        f"User specified additional parameters for rule {current_rule}:\n {add_params}"
+    )
+    return add_params
+
+
+# Global config
+localrules:
+    start,
+    finish,
+    rename_star_rpm_for_alfa,
+    prepare_multiqc_config,
+
+
+# Include subworkflows
+include: os.path.join("rules", "common.smk")
+include: os.path.join("rules", "paired_end.snakefile.smk")
+include: os.path.join("rules", "single_end.snakefile.smk")
+
+
+rule finish:
+    """
+        Rule for collecting outputs
+    """
+    input:
+        multiqc_report=os.path.join(config["output_dir"], "multiqc_summary"),
+        bigWig=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "bigWig",
+                "{renamed_unique}",
+                "{sample}_{renamed_unique}_{strand}.bw",
+            ),
+            sample=pd.unique(samples_table.index.values),
+            strand=["plus", "minus"],
+            renamed_unique=alfa_dict.keys(),
+        ),
+        salmon_merge_genes=expand(
+            os.path.join(
+                config["output_dir"],
+                "summary_salmon",
+                "quantmerge",
+                "genes_{salmon_merge_on}.tsv",
+            ),
+            salmon_merge_on=["tpm", "numreads"],
+        ),
+        salmon_merge_transcripts=expand(
+            os.path.join(
+                config["output_dir"],
+                "summary_salmon",
+                "quantmerge",
+                "transcripts_{salmon_merge_on}.tsv",
+            ),
+            salmon_merge_on=["tpm", "numreads"],
+        ),
+        kallisto_merge_transcripts=os.path.join(
+            config["output_dir"], "summary_kallisto", "transcripts_tpm.tsv"
+        ),
+        kallisto_merge_genes=os.path.join(
+            config["output_dir"], "summary_kallisto", "genes_tpm.tsv"
+        ),
+
+
+current_rule = "start"
+
+
+rule start:
+    """
+       Get samples
+    """
+    input:
+        reads=lambda wildcards: expand(
+            pd.Series(samples_table.loc[wildcards.sample, wildcards.mate]).values
+        ),
+    output:
+        reads=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "start",
+            "{sample}.{mate}.fastq.gz",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{sample}}.{{mate}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{sample}}.{{mate}}.stdout.log",
+        ),
+    container:
+        "docker://ubuntu:focal-20210416"
+    shell:
+        "(cat {input.reads} > {output.reads}) \
+        1> {log.stdout} 2> {log.stderr} "
+
+
+current_rule = "fastqc"
+
+
+rule fastqc:
+    """
+        A quality control tool for high throughput sequence data
+    """
+    input:
+        reads=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "start",
+            "{sample}.{mate}.fastq.gz",
+        ),
+    output:
+        outdir=directory(
+            os.path.join(
+                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=("--outdir",)
+        ),
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    container:
+        "docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "fastqc.yaml")
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{mate}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{mate}}.stdout.log",
+        ),
+    shell:
+        "(mkdir -p {output.outdir}; \
+        fastqc --outdir {output.outdir} \
+        --threads {threads} \
+        {params.additional_params} \
+        {input.reads}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "fastqc_trimmed"
+
+
+rule fastqc_trimmed:
+    """
+        A quality control tool for high throughput sequence data
+    """
+    input:
+        reads=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "{sample}.{mate}.{seqmode}.remove_polya.fastq.gz",
+        ),
+    output:
+        outdir=directory(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "fastqc_trimmed",
+                "{mate}_{seqmode}",
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=("--outdir",)
+        ),
+    threads: 1
+    container:
+        "docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "fastqc.yaml")
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{seqmode}}_{{mate}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{seqmode}}_{{mate}}.stdout.log",
+        ),
+    shell:
+        "(mkdir -p {output.outdir}; \
+        fastqc --outdir {output.outdir} \
+        --threads {threads} \
+        {params.additional_params} \
+        {input.reads}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "create_index_star"
+
+
+rule create_index_star:
+    """
+        Create index for STAR alignments
+    """
+    input:
+        genome=lambda wildcards: os.path.abspath(
+            get_sample("genome", search_id="organism", search_value=wildcards.organism)
+        ),
+        gtf=lambda wildcards: os.path.abspath(
+            get_sample("gtf", search_id="organism", search_value=wildcards.organism)
+        ),
+    output:
+        chromosome_info=os.path.join(
+            config["star_indexes"],
+            "{organism}",
+            "{index_size}",
+            "STAR_index",
+            "chrNameLength.txt",
+        ),
+        chromosomes_names=os.path.join(
+            config["star_indexes"],
+            "{organism}",
+            "{index_size}",
+            "STAR_index",
+            "chrName.txt",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        output_dir=lambda wildcards, output: os.path.dirname(output.chromosome_info),
+        outFileNamePrefix=os.path.join(
+            config["star_indexes"], "{organism}", "{index_size}", "STAR_index/STAR_"
+        ),
+        sjdbOverhang="{index_size}",
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--runMode",
+                "--sjdbOverhang",
+                "--genomeDir",
+                "--genomeFastaFiles",
+                "--outFileNamePrefix",
+                "--sjdbGTFfile",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "STAR.yaml")
+    threads: 12
+    resources:
+        mem_mb=lambda wildcards, attempt: 32000 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.stdout.log"
+        ),
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        chmod -R 777 {params.output_dir}; \
+        STAR \
+        --runMode genomeGenerate \
+        --sjdbOverhang {params.sjdbOverhang} \
+        --genomeDir {params.output_dir} \
+        --genomeFastaFiles {input.genome} \
+        --runThreadN {threads} \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --sjdbGTFfile {input.gtf}) \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "extract_transcriptome"
+
+
+rule extract_transcriptome:
+    """
+        Create transcriptome from genome and gene annotations
+    """
+    input:
+        genome=lambda wildcards: get_sample(
+            "genome", search_id="organism", search_value=wildcards.organism
+        ),
+        gtf=lambda wildcards: get_sample(
+            "gtf", search_id="organism", search_value=wildcards.organism
+        ),
+    output:
+        transcriptome=temp(
+            os.path.join(
+                config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "-w",
+                "-g",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/gffread:0.12.1--h2e03b76_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "gffread.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(config["log_dir"], f"{current_rule}_{{organism}}.log"),
+        stdout=os.path.join(config["log_dir"], f"{current_rule}_{{organism}}.log"),
+    shell:
+        "(gffread \
+        -w {output.transcriptome} \
+        -g {input.genome} \
+        {params.additional_params} \
+        {input.gtf}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "concatenate_transcriptome_and_genome"
+
+
+rule concatenate_transcriptome_and_genome:
+    """
+        Concatenate genome and transcriptome
+    """
+    input:
+        transcriptome=os.path.join(
+            config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
+        ),
+        genome=lambda wildcards: get_sample(
+            "genome", search_id="organism", search_value=wildcards.organism
+        ),
+    output:
+        genome_transcriptome=temp(
+            os.path.join(
+                config["output_dir"],
+                "transcriptome",
+                "{organism}",
+                "genome_transcriptome.fa",
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+    container:
+        "docker://ubuntu:focal-20210416"
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}.stderr.log"
+        ),
+    shell:
+        "(cat {input.transcriptome} {input.genome} \
+        1> {output.genome_transcriptome}) \
+        2> {log.stderr}"
+
+
+current_rule = "create_index_salmon"
+
+
+rule create_index_salmon:
+    """
+        Create index for Salmon quantification
+    """
+    input:
+        genome_transcriptome=os.path.join(
+            config["output_dir"],
+            "transcriptome",
+            "{organism}",
+            "genome_transcriptome.fa",
+        ),
+        chr_names=lambda wildcards: os.path.join(
+            config["star_indexes"],
+            get_sample("organism"),
+            get_sample("index_size"),
+            "STAR_index",
+            "chrName.txt",
+        ),
+    output:
+        index=directory(
+            os.path.join(
+                config["salmon_indexes"], "{organism}", "{kmer}", "salmon.idx"
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        kmerLen="{kmer}",
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--transcripts",
+                "--decoys",
+                "--index",
+                "--kmerLen",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "salmon.yaml")
+    threads: 8
+    resources:
+        mem_mb=lambda wildcards, attempt: 32000 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}_{{kmer}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}_{{kmer}}.stdout.log"
+        ),
+    shell:
+        "(salmon index \
+        --transcripts {input.genome_transcriptome} \
+        --decoys {input.chr_names} \
+        --index {output.index} \
+        --kmerLen {params.kmerLen} \
+        --threads {threads}) \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "create_index_kallisto"
+
+
+rule create_index_kallisto:
+    """
+        Create index for Kallisto quantification
+    """
+    input:
+        transcriptome=os.path.join(
+            config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
+        ),
+    output:
+        index=os.path.join(config["kallisto_indexes"], "{organism}", "kallisto.idx"),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        output_dir=lambda wildcards, output: os.path.dirname(output.index),
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=("-i",)
+        ),
+    container:
+        "docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
+    conda:
+        os.path.join(workflow.basedir, "envs", "kallisto.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 8192 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}.stdout.log"
+        ),
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        chmod -R 777 {params.output_dir}; \
+        kallisto index \
+        {params.additional_params} \
+        -i {output.index} \
+        {input.transcriptome}) \
+        1> {log.stdout}  2> {log.stderr}"
+
+
+current_rule = "extract_transcripts_as_bed12"
+
+
+rule extract_transcripts_as_bed12:
+    """
+        Convert transcripts to BED12 format
+    """
+    input:
+        gtf=lambda wildcards: get_sample("gtf"),
+    output:
+        bed12=temp(
+            os.path.join(config["output_dir"], "full_transcripts_protein_coding.bed")
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--gtf",
+                "--bed12",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/zgtf:0.1.1--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "zgtf.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
+        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
+    shell:
+        "(gtf2bed12 \
+        --gtf {input.gtf} \
+        --bed12 {output.bed12}); \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "sort_genomic_alignment_samtools"
+
+
+rule sort_genomic_alignment_samtools:
+    """
+        Sort genome bamfile using samtools
+    """
+    input:
+        bam=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "map_genome",
+            "{sample}.{seqmode}.Aligned.out.bam",
+        ),
+    output:
+        bam=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "map_genome",
+            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=()
+        ),
+    container:
+        "docker://quay.io/biocontainers/samtools:1.9--h10a08f8_12"
+    conda:
+        os.path.join(workflow.basedir, "envs", "samtools.yaml")
+    threads: 8
+    resources:
+        mem_mb=lambda wildcards, attempt: 8000 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}.{{seqmode}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}.{{seqmode}}.stdout.log",
+        ),
+    shell:
+        "(samtools sort \
+        -o {output.bam} \
+        -@ {threads} \
+        {params.additional_params} \
+        {input.bam}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "index_genomic_alignment_samtools"
+
+
+rule index_genomic_alignment_samtools:
+    """
+        Index genome bamfile using samtools
+    """
+    input:
+        bam=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "map_genome",
+            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
+        ),
+    output:
+        bai=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "map_genome",
+            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=()
+        ),
+    container:
+        "docker://quay.io/biocontainers/samtools:1.3.1--h1b8c3c0_8"
+    conda:
+        os.path.join(workflow.basedir, "envs", "samtools.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}.{{seqmode}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}.{{seqmode}}.stdout.log",
+        ),
+    shell:
+        "(samtools index \
+        {params.additional_params} \
+        {input.bam} {output.bai};) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "calculate_TIN_scores"
+
+
+rule calculate_TIN_scores:
+    """
+        Calculate transcript integrity (TIN) score
+    """
+    input:
+        bam=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "map_genome",
+                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
+            ),
+            sample=wildcards.sample,
+            seqmode=get_sample(
+                "seqmode", search_id="index", search_value=wildcards.sample
+            ),
+        ),
+        bai=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "map_genome",
+                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
+            ),
+            sample=wildcards.sample,
+            seqmode=get_sample(
+                "seqmode", search_id="index", search_value=wildcards.sample
+            ),
+        ),
+        transcripts_bed12=os.path.join(
+            config["output_dir"], "full_transcripts_protein_coding.bed"
+        ),
+    output:
+        TIN_score=temp(
+            os.path.join(
+                config["output_dir"], "samples", "{sample}", "TIN", "TIN_score.tsv"
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        sample="{sample}",
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "-i",
+                "-r",
+                "--names",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/tin-score-calculation:0.6.3--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "tin-score-calculation.yaml")
+    threads: 8
+    resources:
+        mem_mb=lambda wildcards, attempt, input: len(input.bam) * 1024 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], "samples", "{sample}", f"{current_rule}.log"
+        ),
+    shell:
+        "(calculate-tin.py \
+        -i {input.bam} \
+        -r {input.transcripts_bed12} \
+        --names {params.sample} \
+        -p {threads} \
+        {params.additional_params} \
+        > {output.TIN_score};) 2> {log.stderr}"
+
+
+current_rule = "salmon_quantmerge_genes"
+
+
+rule salmon_quantmerge_genes:
+    """
+        Merge gene quantifications into a single file
+    """
+    input:
+        salmon_in=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "{sample}.salmon.{seqmode}",
+                "quant.sf",
+            ),
+            zip,
+            sample=pd.unique(samples_table.index.values),
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+    output:
+        salmon_out=os.path.join(
+            config["output_dir"],
+            "summary_salmon",
+            "quantmerge",
+            "genes_{salmon_merge_on}.tsv",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        salmon_in=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "{sample}.salmon.{seqmode}",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+        sample_name_list=expand(
+            "{sample}", sample=pd.unique(samples_table.index.values)
+        ),
+        salmon_merge_on="{salmon_merge_on}",
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--quants",
+                "--genes",
+                "--transcripts",
+                "--names",
+                "--column",
+                "--output",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "salmon.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt, input: len(input.salmon_in) * 1024 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stdout.log"
+        ),
+    shell:
+        "(salmon quantmerge \
+        --quants {params.salmon_in} \
+        --genes \
+        --names {params.sample_name_list} \
+        --column {params.salmon_merge_on} \
+        --output {output.salmon_out};) \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "salmon_quantmerge_transcripts"
+
+
+rule salmon_quantmerge_transcripts:
+    """
+        Merge transcript quantifications into a single file
+    """
+    input:
+        salmon_in=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "{sample}.salmon.{seqmode}",
+                "quant.sf",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+    output:
+        salmon_out=os.path.join(
+            config["output_dir"],
+            "summary_salmon",
+            "quantmerge",
+            "transcripts_{salmon_merge_on}.tsv",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        salmon_in=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "{sample}.salmon.{seqmode}",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+        sample_name_list=expand(
+            "{sample}", sample=pd.unique(samples_table.index.values)
+        ),
+        salmon_merge_on="{salmon_merge_on}",
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--quants",
+                "--genes",
+                "--transcripts",
+                "--names",
+                "--column",
+                "--output",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "salmon.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt, input: len(input.salmon_in) * 1000 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stdout.log"
+        ),
+    shell:
+        "(salmon quantmerge \
+        --quants {params.salmon_in} \
+        --names {params.sample_name_list} \
+        --column {params.salmon_merge_on} \
+        --output {output.salmon_out}) \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "kallisto_merge_genes"
+
+
+rule kallisto_merge_genes:
+    """
+        Merge gene quantifications into single file
+    """
+    input:
+        pseudoalignment=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "quant_kallisto",
+                "{sample}.{seqmode}.kallisto.pseudo.sam",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+        gtf=get_sample("gtf"),
+    output:
+        gn_tpm=os.path.join(config["output_dir"], "summary_kallisto", "genes_tpm.tsv"),
+        gn_counts=os.path.join(
+            config["output_dir"], "summary_kallisto", "genes_counts.tsv"
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        dir_out=lambda wildcards, output: os.path.dirname(output.gn_counts),
+        tables=",".join(
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "samples",
+                    "{sample}",
+                    "quant_kallisto",
+                    "abundance.h5",
+                ),
+                sample=[i for i in pd.unique(samples_table.index.values)],
+            )
+        ),
+        sample_name_list=",".join(
+            expand("{sample}", sample=pd.unique(samples_table.index.values))
+        ),
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--input",
+                "--names",
+                "--txOut",
+                "--anno",
+                "--output",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/r-merge-kallisto:0.6--hdfd78af_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "r-merge-kallisto.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt, input: len(input.pseudoalignment)
+        * 1000
+        * attempt,
+    log:
+        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
+        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
+    shell:
+        "(merge_kallisto.R \
+        --input {params.tables} \
+        --names {params.sample_name_list} \
+        --txOut FALSE \
+        --anno {input.gtf} \
+        --output {params.dir_out} \
+        {params.additional_params} ) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "kallisto_merge_transcripts"
+
+
+rule kallisto_merge_transcripts:
+    """
+        Merge transcript quantifications into a single files
+    """
+    input:
+        pseudoalignment=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "quant_kallisto",
+                "{sample}.{seqmode}.kallisto.pseudo.sam",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+    output:
+        tx_tpm=os.path.join(
+            config["output_dir"], "summary_kallisto", "transcripts_tpm.tsv"
+        ),
+        tx_counts=os.path.join(
+            config["output_dir"], "summary_kallisto", "transcripts_counts.tsv"
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        dir_out=lambda wildcards, output: os.path.dirname(output.tx_counts),
+        tables=",".join(
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "samples",
+                    "{sample}",
+                    "quant_kallisto",
+                    "abundance.h5",
+                ),
+                sample=[i for i in pd.unique(samples_table.index.values)],
+            )
+        ),
+        sample_name_list=",".join(
+            expand("{sample}", sample=pd.unique(samples_table.index.values))
+        ),
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--input",
+                "--names",
+                "--txOut",
+                "--output",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/r-merge-kallisto:0.6--hdfd78af_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "r-merge-kallisto.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt, input: len(input.pseudoalignment)
+        * 1024
+        * attempt,
+    log:
+        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
+        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
+    shell:
+        "(merge_kallisto.R \
+        --input {params.tables} \
+        --names {params.sample_name_list} \
+        --output {params.dir_out} \
+        {params.additional_params}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "pca_salmon"
+
+
+rule pca_salmon:
+    input:
+        tpm=os.path.join(
+            config["output_dir"], "summary_salmon", "quantmerge", "{molecule}_tpm.tsv"
+        ),
+    output:
+        out=directory(
+            os.path.join(config["output_dir"], "zpca", "pca_salmon_{molecule}")
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--tpm",
+                "--out",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/zpca:0.8.3.post1--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "zpca.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{molecule}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{molecule}}.stdout.log"
+        ),
+    shell:
+        "(zpca-tpm  \
+        --tpm {input.tpm} \
+        --out {output.out} \
+        {params.additional_params}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "pca_kallisto"
+
+
+rule pca_kallisto:
+    input:
+        tpm=os.path.join(config["output_dir"], "summary_kallisto", "{molecule}_tpm.tsv"),
+    output:
+        out=directory(
+            os.path.join(config["output_dir"], "zpca", "pca_kallisto_{molecule}")
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--tpm",
+                "--out",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/zpca:0.8.3.post1--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "zpca.yaml")
+    threads: 1
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], f"{current_rule}_{{molecule}}.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], f"{current_rule}_{{molecule}}.stdout.log"
+        ),
+    shell:
+        "(zpca-tpm  \
+        --tpm {input.tpm} \
+        --out {output.out} \
+        {params.additional_params}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "star_rpm"
+
+
+rule star_rpm:
+    """
+        Create stranded bedgraph coverage with STARs RPM normalisation
+    """
+    input:
+        bam=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "map_genome",
+                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
+            ),
+            sample=wildcards.sample,
+            seqmode=get_sample(
+                "seqmode", search_id="index", search_value=wildcards.sample
+            ),
+        ),
+        bai=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "map_genome",
+                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
+            ),
+            sample=wildcards.sample,
+            seqmode=get_sample(
+                "seqmode", search_id="index", search_value=wildcards.sample
+            ),
+        ),
+    output:
+        str1=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.Unique.str1.out.bg",
+            )
+        ),
+        str2=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.UniqueMultiple.str1.out.bg",
+            )
+        ),
+        str3=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.Unique.str2.out.bg",
+            )
+        ),
+        str4=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.UniqueMultiple.str2.out.bg",
+            )
+        ),
+    shadow:
+        "full"
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        out_dir=lambda wildcards, output: os.path.dirname(output.str1),
+        prefix=lambda wildcards, output: os.path.join(
+            os.path.dirname(output.str1), f"{str(wildcards.sample)}_"
+        ),
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--runMode",
+                "--inputBAMfile",
+                "--outWigType",
+                "--outFileNamePrefix",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
+    conda:
+        os.path.join(workflow.basedir, "envs", "STAR.yaml")
+    threads: 4
+    resources:
+        mem_mb=lambda wildcards, attempt: 8192 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"], "samples", "{sample}", f"{current_rule}_.stderr.log"
+        ),
+        stdout=os.path.join(
+            config["log_dir"], "samples", "{sample}", f"{current_rule}_.stdout.log"
+        ),
+    shell:
+        "(mkdir -p {params.out_dir}; \
+        chmod -R 777 {params.out_dir}; \
+        STAR \
+        --runMode inputAlignmentsFromBAM \
+        --runThreadN {threads} \
+        --inputBAMfile {input.bam} \
+        --outWigType bedGraph \
+        --outFileNamePrefix {params.prefix}) \
+        {params.additional_params} \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "rename_star_rpm_for_alfa"
+
+
+rule rename_star_rpm_for_alfa:
+    input:
+        plus=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.{unique}.{plus}.out.bg",
+            ),
+            sample=wildcards.sample,
+            unique=alfa_dict[wildcards.renamed_unique],
+            plus=get_directionality(
+                get_sample(
+                    "libtype", search_id="index", search_value=wildcards.sample
+                ),
+                "alfa_plus",
+            ),
+        ),
+        minus=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "STAR_coverage",
+                "{sample}_Signal.{unique}.{minus}.out.bg",
+            ),
+            sample=wildcards.sample,
+            unique=alfa_dict[wildcards.renamed_unique],
+            minus=get_directionality(
+                get_sample(
+                    "libtype", search_id="index", search_value=wildcards.sample
+                ),
+                "alfa_minus",
+            ),
+        ),
+    output:
+        plus=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "ALFA",
+                "{renamed_unique}",
+                "{sample}.{renamed_unique}.plus.bg",
+            )
+        ),
+        minus=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "ALFA",
+                "{renamed_unique}",
+                "{sample}.{renamed_unique}.minus.bg",
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+    container:
+        "docker://ubuntu:focal-20210416"
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{renamed_unique}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{renamed_unique}}.stdout.log",
+        ),
+    shell:
+        "(cp {input.plus} {output.plus}; \
+        cp {input.minus} {output.minus};) \
+        1>{log.stdout} 2>{log.stderr}"
+
+
+current_rule = "generate_alfa_index"
+
+
+rule generate_alfa_index:
+    """ Generate ALFA index files from sorted GTF file """
+    input:
+        gtf=lambda wildcards: os.path.abspath(
+            get_sample("gtf", search_id="organism", search_value=wildcards.organism)
+        ),
+        chr_len=os.path.join(
+            config["star_indexes"],
+            "{organism}",
+            "{index_size}",
+            "STAR_index",
+            "chrNameLength.txt",
+        ),
+    output:
+        index_stranded=os.path.join(
+            config["alfa_indexes"],
+            "{organism}",
+            "{index_size}",
+            "ALFA",
+            "sorted_genes.stranded.ALFA_index",
+        ),
+        index_unstranded=os.path.join(
+            config["alfa_indexes"],
+            "{organism}",
+            "{index_size}",
+            "ALFA",
+            "sorted_genes.unstranded.ALFA_index",
+        ),
+        temp_dir=temp(
+            directory(
+                os.path.join(
+                    config["alfa_indexes"],
+                    "{organism}",
+                    "{index_size}",
+                    "ALFA_temp",
+                )
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        genome_index="sorted_genes",
+        out_dir=lambda wildcards, output: os.path.dirname(output.index_stranded),
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "-a",
+                "-g",
+                "--chr_len",
+                "-o",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "alfa.yaml")
+    threads: 4
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        os.path.join(
+            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.log"
+        ),
+    shell:
+        "(mkdir -p {output.temp_dir}; \
+        alfa -a {input.gtf} \
+        -g {params.genome_index} \
+        --chr_len {input.chr_len} \
+        --temp_dir {output.temp_dir} \
+        -p {threads} \
+        -o {params.out_dir} \
+        {params.additional_params}) \
+        &> {log}"
+
+
+current_rule = "alfa_qc"
+
+
+rule alfa_qc:
+    """
+        Run ALFA from stranded bedgraph files
+    """
+    input:
+        plus=lambda wildcards: os.path.join(
+            config["output_dir"],
+            "samples",
+            wildcards.sample,
+            "ALFA",
+            wildcards.renamed_unique,
+            f"{wildcards.sample}.{wildcards.renamed_unique}.plus.bg",
+        ),
+        minus=lambda wildcards: os.path.join(
+            config["output_dir"],
+            "samples",
+            wildcards.sample,
+            "ALFA",
+            wildcards.renamed_unique,
+            f"{wildcards.sample}.{wildcards.renamed_unique}.minus.bg",
+        ),
+        gtf=lambda wildcards: os.path.join(
+            config["alfa_indexes"],
+            get_sample("organism", search_id="index", search_value=wildcards.sample),
+            get_sample("index_size", search_id="index", search_value=wildcards.sample),
+            "ALFA",
+            "sorted_genes.stranded.ALFA_index",
+        ),
+    output:
+        biotypes=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "ALFA",
+                "{renamed_unique}",
+                "ALFA_plots.Biotypes.pdf",
+            )
+        ),
+        categories=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "ALFA",
+                "{renamed_unique}",
+                "ALFA_plots.Categories.pdf",
+            )
+        ),
+        table=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "ALFA",
+            "{renamed_unique}",
+            "{sample}.ALFA_feature_counts.tsv",
+        ),
+        temp_dir=temp(
+            directory(
+                os.path.join(
+                    config["output_dir"],
+                    "samples",
+                    "{sample}",
+                    "ALFA",
+                    "{renamed_unique}",
+                    "ALFA_temp",
+                )
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        out_dir=lambda wildcards, output: os.path.dirname(output.biotypes),
+        genome_index=lambda wildcards, input: os.path.abspath(
+            os.path.join(os.path.dirname(input.gtf), "sorted_genes")
+        ),
+        plus=lambda wildcards, input: os.path.basename(input.plus),
+        minus=lambda wildcards, input: os.path.basename(input.minus),
+        name="{sample}",
+        alfa_orientation=lambda wildcards: get_directionality(
+            get_sample("libtype", search_id="index", search_value=wildcards.sample),
+            "alfa",
+        ),
+        temp_dir=lambda wildcards, output: os.path.abspath(
+            os.path.dirname(output.temp_dir)
+        ),
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "-g",
+                "--bedgraph",
+                "-s",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "alfa.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}.{{renamed_unique}}.log",
+        ),
+    shell:
+        "(mkdir -p {output.temp_dir};\
+        cd {params.out_dir}; \
+        alfa \
+        -g {params.genome_index} \
+        --bedgraph {params.plus} {params.minus} {params.name} \
+        -s {params.alfa_orientation} \
+        --temp_dir {params.temp_dir} \
+        {params.additional_params}) \
+        &> {log}"
+
+
+current_rule = "prepare_multiqc_config"
+
+
+rule prepare_multiqc_config:
+    """
+        Prepare config for the MultiQC
+    """
+    input:
+        script=os.path.join(workflow.basedir, "scripts", "zarp_multiqc_config.py"),
+    output:
+        multiqc_config=os.path.join(config["output_dir"], "multiqc_config.yaml"),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        logo_path=config["report_logo"] if "report_logo" in config else "",
+        multiqc_intro_text=config["report_description"],
+        url=config["report_url"] if "report_url" in config else "",
+        author_name=config["author_name"],
+        author_email=config["author_email"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--config",
+                "--intro-text",
+                "--custom-logo",
+                "--url",
+            ),
+        ),
+    log:
+        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
+        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
+    container:
+        "docker://quay.io/biocontainers/zavolan-multiqc-plugins:1.3--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "zavolan-multiqc-plugins.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    shell:
+        "(python {input.script} \
+        --config {output.multiqc_config} \
+        --intro-text '{params.multiqc_intro_text}' \
+        --custom-logo '{params.logo_path}' \
+        --url '{params.url}' \
+        --author-name '{params.author_name}' \
+        --author-email '{params.author_email}' \
+        {params.additional_params}) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "multiqc_report"
+
+
+rule multiqc_report:
+    """
+        Create report with MultiQC
+    """
+    input:
+        fastqc_se=expand(
+            os.path.join(
+                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
+            ),
+            sample=pd.unique(samples_table.index.values),
+            mate="fq1",
+        ),
+        fastqc_pe=expand(
+            os.path.join(
+                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
+            ),
+            sample=[
+                i
+                for i in pd.unique(
+                    samples_table[samples_table["seqmode"] == "pe"].index.values
+                )
+            ],
+            mate="fq2",
+        ),
+        fastqc_trimmed_se=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "fastqc_trimmed",
+                "fq1_{seqmode}",
+            ),
+            zip,
+            sample=pd.unique(samples_table.index.values),
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+        fastqc_trimmed_pe=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "fastqc_trimmed",
+                "{mate}_pe",
+            ),
+            sample=[
+                i
+                for i in pd.unique(
+                    samples_table[samples_table["seqmode"] == "pe"].index.values
+                )
+            ],
+            mate="fq2",
+        ),
+        pseudoalignment=expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "quant_kallisto",
+                "{sample}.{seqmode}.kallisto.pseudo.sam",
+            ),
+            zip,
+            sample=[i for i in pd.unique(samples_table.index.values)],
+            seqmode=[
+                get_sample("seqmode", search_id="index", search_value=i)
+                for i in pd.unique(samples_table.index.values)
+            ],
+        ),
+        TIN_score=expand(
+            os.path.join(
+                config["output_dir"], "samples", "{sample}", "TIN", "TIN_score.tsv"
+            ),
+            sample=pd.unique(samples_table.index.values),
+        ),
+        tables=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "ALFA",
+                "{renamed_unique}",
+                "{sample}.ALFA_feature_counts.tsv",
+            ),
+            sample=pd.unique(samples_table.index.values),
+            renamed_unique=alfa_dict.keys(),
+        ),
+        zpca_salmon=expand(
+            os.path.join(config["output_dir"], "zpca", "pca_salmon_{molecule}"),
+            molecule=["genes", "transcripts"],
+        ),
+        zpca_kallisto=expand(
+            os.path.join(config["output_dir"], "zpca", "pca_kallisto_{molecule}"),
+            molecule=["genes", "transcripts"],
+        ),
+        multiqc_config=os.path.join(config["output_dir"], "multiqc_config.yaml"),
+    output:
+        multiqc_report=directory(os.path.join(config["output_dir"], "multiqc_summary")),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        results_dir=lambda wildcards, output: os.path.split(output.multiqc_report)[0],
+        log_dir=config["log_dir"],
+        additional_params=parse_rule_config(
+            rule_config,
+            current_rule=current_rule,
+            immutable=(
+                "--outdir",
+                "--config",
+            ),
+        ),
+    container:
+        "docker://quay.io/biocontainers/zavolan-multiqc-plugins:1.3--pyh5e36f6f_0"
+    conda:
+        os.path.join(workflow.basedir, "envs", "zavolan-multiqc-plugins.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 4096 * attempt,
+    log:
+        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
+        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
+    shell:
+        "(multiqc \
+        --outdir {output.multiqc_report} \
+        --config {input.multiqc_config} \
+        {params.additional_params} \
+        {params.results_dir} \
+        {params.log_dir};) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+current_rule = "sort_bed_4_big"
+
+
+rule sort_bed_4_big:
+    """
+        sort bedGraphs in order to work with bedGraphtobigWig
+    """
+    input:
+        bg=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "ALFA",
+            "{renamed_unique}",
+            "{sample}.{renamed_unique}.{strand}.bg",
+        ),
+    output:
+        sorted_bg=temp(
+            os.path.join(
+                config["output_dir"],
+                "samples",
+                "{sample}",
+                "bigWig",
+                "{renamed_unique}",
+                "{sample}_{renamed_unique}_{strand}.sorted.bg",
+            )
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=("-i",)
+        ),
+    container:
+        "docker://quay.io/biocontainers/bedtools:2.27.1--h9a82719_5"
+    conda:
+        os.path.join(workflow.basedir, "envs", "bedtools.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 2048 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{renamed_unique}}_{{strand}}.stderr.log",
+        ),
+    shell:
+        "(sortBed \
+        -i {input.bg} \
+        {params.additional_params} \
+        > {output.sorted_bg};) 2> {log.stderr}"
+
+
+current_rule = "prepare_bigWig"
+
+
+rule prepare_bigWig:
+    """
+        bedGraphtobigWig, for viewing in genome browsers
+    """
+    input:
+        sorted_bg=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "bigWig",
+            "{renamed_unique}",
+            "{sample}_{renamed_unique}_{strand}.sorted.bg",
+        ),
+        chr_sizes=lambda wildcards: os.path.join(
+            config["star_indexes"],
+            get_sample("organism", search_id="index", search_value=wildcards.sample),
+            get_sample("index_size", search_id="index", search_value=wildcards.sample),
+            "STAR_index",
+            "chrNameLength.txt",
+        ),
+    output:
+        bigWig=os.path.join(
+            config["output_dir"],
+            "samples",
+            "{sample}",
+            "bigWig",
+            "{renamed_unique}",
+            "{sample}_{renamed_unique}_{strand}.bw",
+        ),
+    params:
+        cluster_log_path=config["cluster_log_dir"],
+        additional_params=parse_rule_config(
+            rule_config, current_rule=current_rule, immutable=()
+        ),
+    container:
+        "docker://quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h0b8a92a_2"
+    conda:
+        os.path.join(workflow.basedir, "envs", "ucsc-bedgraphtobigwig.yaml")
+    resources:
+        mem_mb=lambda wildcards, attempt: 1024 * attempt,
+    log:
+        stderr=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{renamed_unique}}_{{strand}}.stderr.log",
+        ),
+        stdout=os.path.join(
+            config["log_dir"],
+            "samples",
+            "{sample}",
+            f"{current_rule}_{{renamed_unique}}_{{strand}}.stdout.log",
+        ),
+    shell:
+        "(bedGraphToBigWig \
+        {params.additional_params} \
+        {input.sorted_bg} \
+        {input.chr_sizes} \
+        {output.bigWig};) \
+        1> {log.stdout} 2> {log.stderr}"
-- 
GitLab


From 63a7dfb4a4077bbe6345b69b89cba11618bc9a73 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:16:26 +0000
Subject: [PATCH 02/22] Delete Snakefile

---
 workflow/Snakefile | 1941 --------------------------------------------
 1 file changed, 1941 deletions(-)
 delete mode 100644 workflow/Snakefile

diff --git a/workflow/Snakefile b/workflow/Snakefile
deleted file mode 100644
index 2a37bcf..0000000
--- a/workflow/Snakefile
+++ /dev/null
@@ -1,1941 +0,0 @@
-"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
-import os
-import pandas as pd
-import shutil
-import yaml
-from shlex import quote
-from typing import Tuple
-from snakemake.utils import validate
-
-
-configfile: os.path.join(workflow.basedir, "..", "config", "config.yaml")
-
-
-## Preparations
-# Get sample table
-samples_table = pd.read_csv(
-    config["samples"],
-    header=0,
-    index_col=0,
-    comment="#",
-    engine="python",
-    sep="\t",
-)
-
-# dict for translation of "directionality parameters"
-directionality_dict = {
-    "SF": {
-        "kallisto": "--fr-stranded",
-        "alfa": "fr-secondstrand",
-        "alfa_plus": "str1",
-        "alfa_minus": "str2",
-    },
-    "SR": {
-        "kallisto": "--rf-stranded",
-        "alfa": "fr-firststrand",
-        "alfa_plus": "str2",
-        "alfa_minus": "str1",
-    },
-}
-# dict for alfa output"
-alfa_dict = {"MultimappersIncluded": "UniqueMultiple", "UniqueMappers": "Unique"}
-
-# Validate config
-validate(config, os.path.join("..", "resources", "config_schema.json"))
-logger.info(f"Config file after validation: {config}")
-
-# Parse YAML rule config file
-try:
-    with open(config["rule_config"]) as _file:
-        rule_config = yaml.safe_load(_file)
-    logger.info(f"Loaded rule_config from {config['rule_config']}.")
-except TypeError:
-    logger.error(
-        f'No string supplied at field "rule_config", but: {type(config["rule_config"])} with content: {config["rule_config"]}'
-    )
-    raise
-except FileNotFoundError:
-    logger.error(
-        f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! "
-    )
-    raise
-except KeyError:
-    rule_config = {}
-    logger.warning(f"No rule config specified: using default values for all tools.")
-
-
-def parse_rule_config(
-    rule_config: dict, current_rule: str, immutable: Tuple[str, ...] = ()
-):
-    """Get rule specific parameters from rule_config file"""
-
-    # If rule config file not present, emtpy string will be returned
-    if not rule_config:
-        logger.info(f"No rule config specified: using default values for all tools.")
-        return ""
-    # Same if current rule not specified in rule config
-    if current_rule not in rule_config or not rule_config[current_rule]:
-        logger.info(
-            f"No additional parameters for rule {current_rule} specified: using default settings."
-        )
-        return ""
-
-    # Subset only section for current rule
-    rule_config = rule_config[current_rule]
-
-    # Build list of parameters and values
-    params_vals = []
-    for param, val in rule_config.items():
-        # Do not allow the user to change wiring-critical, fixed arguments, or arguments that are passed through samples table
-        if param in immutable:
-            raise ValueError(
-                f"The following parameter in rule {current_rule} is critical for the pipeline to "
-                f"function as expected and cannot be modified: {param}"
-            )
-        # Accept only strings; this prevents unintended results potentially
-        # arising from users entering reserved YAML keywords or nested
-        # structures (lists, dictionaries)
-        if isinstance(val, str):
-            params_vals.append(str(param))
-            # Do not include a value for flags (signified by empty strings)
-            if val:
-                params_vals.append(val)
-        else:
-            raise ValueError(
-                "Only string values allowed for tool parameters: Found type "
-                f"'{type(val).__name__}' for value of parameter '{param}'"
-            )
-    # Return quoted string
-    add_params = " ".join(quote(item) for item in params_vals)
-    logger.info(
-        f"User specified additional parameters for rule {current_rule}:\n {add_params}"
-    )
-    return add_params
-
-
-# Global config
-localrules:
-    start,
-    finish,
-    rename_star_rpm_for_alfa,
-    prepare_multiqc_config,
-
-
-# Include subworkflows
-include: os.path.join("rules", "common.smk")
-include: os.path.join("rules", "paired_end.snakefile.smk")
-include: os.path.join("rules", "single_end.snakefile.smk")
-
-
-rule finish:
-    """
-        Rule for collecting outputs
-    """
-    input:
-        multiqc_report=os.path.join(config["output_dir"], "multiqc_summary"),
-        bigWig=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "bigWig",
-                "{renamed_unique}",
-                "{sample}_{renamed_unique}_{strand}.bw",
-            ),
-            sample=pd.unique(samples_table.index.values),
-            strand=["plus", "minus"],
-            renamed_unique=alfa_dict.keys(),
-        ),
-        salmon_merge_genes=expand(
-            os.path.join(
-                config["output_dir"],
-                "summary_salmon",
-                "quantmerge",
-                "genes_{salmon_merge_on}.tsv",
-            ),
-            salmon_merge_on=["tpm", "numreads"],
-        ),
-        salmon_merge_transcripts=expand(
-            os.path.join(
-                config["output_dir"],
-                "summary_salmon",
-                "quantmerge",
-                "transcripts_{salmon_merge_on}.tsv",
-            ),
-            salmon_merge_on=["tpm", "numreads"],
-        ),
-        kallisto_merge_transcripts=os.path.join(
-            config["output_dir"], "summary_kallisto", "transcripts_tpm.tsv"
-        ),
-        kallisto_merge_genes=os.path.join(
-            config["output_dir"], "summary_kallisto", "genes_tpm.tsv"
-        ),
-
-
-current_rule = "start"
-
-
-rule start:
-    """
-       Get samples
-    """
-    input:
-        reads=lambda wildcards: expand(
-            pd.Series(samples_table.loc[wildcards.sample, wildcards.mate]).values
-        ),
-    output:
-        reads=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "start",
-            "{sample}.{mate}.fastq.gz",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{sample}}.{{mate}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{sample}}.{{mate}}.stdout.log",
-        ),
-    container:
-        "docker://ubuntu:focal-20210416"
-    shell:
-        "(cat {input.reads} > {output.reads}) \
-        1> {log.stdout} 2> {log.stderr} "
-
-
-current_rule = "fastqc"
-
-
-rule fastqc:
-    """
-        A quality control tool for high throughput sequence data
-    """
-    input:
-        reads=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "start",
-            "{sample}.{mate}.fastq.gz",
-        ),
-    output:
-        outdir=directory(
-            os.path.join(
-                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=("--outdir",)
-        ),
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    container:
-        "docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "fastqc.yaml")
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{mate}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{mate}}.stdout.log",
-        ),
-    shell:
-        "(mkdir -p {output.outdir}; \
-        fastqc --outdir {output.outdir} \
-        --threads {threads} \
-        {params.additional_params} \
-        {input.reads}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "fastqc_trimmed"
-
-
-rule fastqc_trimmed:
-    """
-        A quality control tool for high throughput sequence data
-    """
-    input:
-        reads=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "{sample}.{mate}.{seqmode}.remove_polya.fastq.gz",
-        ),
-    output:
-        outdir=directory(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "fastqc_trimmed",
-                "{mate}_{seqmode}",
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=("--outdir",)
-        ),
-    threads: 1
-    container:
-        "docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "fastqc.yaml")
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{seqmode}}_{{mate}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{seqmode}}_{{mate}}.stdout.log",
-        ),
-    shell:
-        "(mkdir -p {output.outdir}; \
-        fastqc --outdir {output.outdir} \
-        --threads {threads} \
-        {params.additional_params} \
-        {input.reads}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "create_index_star"
-
-
-rule create_index_star:
-    """
-        Create index for STAR alignments
-    """
-    input:
-        genome=lambda wildcards: os.path.abspath(
-            get_sample("genome", search_id="organism", search_value=wildcards.organism)
-        ),
-        gtf=lambda wildcards: os.path.abspath(
-            get_sample("gtf", search_id="organism", search_value=wildcards.organism)
-        ),
-    output:
-        chromosome_info=os.path.join(
-            config["star_indexes"],
-            "{organism}",
-            "{index_size}",
-            "STAR_index",
-            "chrNameLength.txt",
-        ),
-        chromosomes_names=os.path.join(
-            config["star_indexes"],
-            "{organism}",
-            "{index_size}",
-            "STAR_index",
-            "chrName.txt",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        output_dir=lambda wildcards, output: os.path.dirname(output.chromosome_info),
-        outFileNamePrefix=os.path.join(
-            config["star_indexes"], "{organism}", "{index_size}", "STAR_index/STAR_"
-        ),
-        sjdbOverhang="{index_size}",
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--runMode",
-                "--sjdbOverhang",
-                "--genomeDir",
-                "--genomeFastaFiles",
-                "--outFileNamePrefix",
-                "--sjdbGTFfile",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "STAR.yaml")
-    threads: 12
-    resources:
-        mem_mb=lambda wildcards, attempt: 32000 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.stdout.log"
-        ),
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        chmod -R 777 {params.output_dir}; \
-        STAR \
-        --runMode genomeGenerate \
-        --sjdbOverhang {params.sjdbOverhang} \
-        --genomeDir {params.output_dir} \
-        --genomeFastaFiles {input.genome} \
-        --runThreadN {threads} \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        --sjdbGTFfile {input.gtf}) \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "extract_transcriptome"
-
-
-rule extract_transcriptome:
-    """
-        Create transcriptome from genome and gene annotations
-    """
-    input:
-        genome=lambda wildcards: get_sample(
-            "genome", search_id="organism", search_value=wildcards.organism
-        ),
-        gtf=lambda wildcards: get_sample(
-            "gtf", search_id="organism", search_value=wildcards.organism
-        ),
-    output:
-        transcriptome=temp(
-            os.path.join(
-                config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "-w",
-                "-g",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/gffread:0.12.1--h2e03b76_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "gffread.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(config["log_dir"], f"{current_rule}_{{organism}}.log"),
-        stdout=os.path.join(config["log_dir"], f"{current_rule}_{{organism}}.log"),
-    shell:
-        "(gffread \
-        -w {output.transcriptome} \
-        -g {input.genome} \
-        {params.additional_params} \
-        {input.gtf}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "concatenate_transcriptome_and_genome"
-
-
-rule concatenate_transcriptome_and_genome:
-    """
-        Concatenate genome and transcriptome
-    """
-    input:
-        transcriptome=os.path.join(
-            config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
-        ),
-        genome=lambda wildcards: get_sample(
-            "genome", search_id="organism", search_value=wildcards.organism
-        ),
-    output:
-        genome_transcriptome=temp(
-            os.path.join(
-                config["output_dir"],
-                "transcriptome",
-                "{organism}",
-                "genome_transcriptome.fa",
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-    container:
-        "docker://ubuntu:focal-20210416"
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}.stderr.log"
-        ),
-    shell:
-        "(cat {input.transcriptome} {input.genome} \
-        1> {output.genome_transcriptome}) \
-        2> {log.stderr}"
-
-
-current_rule = "create_index_salmon"
-
-
-rule create_index_salmon:
-    """
-        Create index for Salmon quantification
-    """
-    input:
-        genome_transcriptome=os.path.join(
-            config["output_dir"],
-            "transcriptome",
-            "{organism}",
-            "genome_transcriptome.fa",
-        ),
-        chr_names=lambda wildcards: os.path.join(
-            config["star_indexes"],
-            get_sample("organism"),
-            get_sample("index_size"),
-            "STAR_index",
-            "chrName.txt",
-        ),
-    output:
-        index=directory(
-            os.path.join(
-                config["salmon_indexes"], "{organism}", "{kmer}", "salmon.idx"
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        kmerLen="{kmer}",
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--transcripts",
-                "--decoys",
-                "--index",
-                "--kmerLen",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "salmon.yaml")
-    threads: 8
-    resources:
-        mem_mb=lambda wildcards, attempt: 32000 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}_{{kmer}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}_{{kmer}}.stdout.log"
-        ),
-    shell:
-        "(salmon index \
-        --transcripts {input.genome_transcriptome} \
-        --decoys {input.chr_names} \
-        --index {output.index} \
-        --kmerLen {params.kmerLen} \
-        --threads {threads}) \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "create_index_kallisto"
-
-
-rule create_index_kallisto:
-    """
-        Create index for Kallisto quantification
-    """
-    input:
-        transcriptome=os.path.join(
-            config["output_dir"], "transcriptome", "{organism}", "transcriptome.fa"
-        ),
-    output:
-        index=os.path.join(config["kallisto_indexes"], "{organism}", "kallisto.idx"),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        output_dir=lambda wildcards, output: os.path.dirname(output.index),
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=("-i",)
-        ),
-    container:
-        "docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
-    conda:
-        os.path.join(workflow.basedir, "envs", "kallisto.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 8192 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}.stdout.log"
-        ),
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        chmod -R 777 {params.output_dir}; \
-        kallisto index \
-        {params.additional_params} \
-        -i {output.index} \
-        {input.transcriptome}) \
-        1> {log.stdout}  2> {log.stderr}"
-
-
-current_rule = "extract_transcripts_as_bed12"
-
-
-rule extract_transcripts_as_bed12:
-    """
-        Convert transcripts to BED12 format
-    """
-    input:
-        gtf=lambda wildcards: get_sample("gtf"),
-    output:
-        bed12=temp(
-            os.path.join(config["output_dir"], "full_transcripts_protein_coding.bed")
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--gtf",
-                "--bed12",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/zgtf:0.1.1--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "zgtf.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
-        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
-    shell:
-        "(gtf2bed12 \
-        --gtf {input.gtf} \
-        --bed12 {output.bed12}); \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "sort_genomic_alignment_samtools"
-
-
-rule sort_genomic_alignment_samtools:
-    """
-        Sort genome bamfile using samtools
-    """
-    input:
-        bam=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "map_genome",
-            "{sample}.{seqmode}.Aligned.out.bam",
-        ),
-    output:
-        bam=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "map_genome",
-            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=()
-        ),
-    container:
-        "docker://quay.io/biocontainers/samtools:1.9--h10a08f8_12"
-    conda:
-        os.path.join(workflow.basedir, "envs", "samtools.yaml")
-    threads: 8
-    resources:
-        mem_mb=lambda wildcards, attempt: 8000 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}.{{seqmode}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}.{{seqmode}}.stdout.log",
-        ),
-    shell:
-        "(samtools sort \
-        -o {output.bam} \
-        -@ {threads} \
-        {params.additional_params} \
-        {input.bam}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "index_genomic_alignment_samtools"
-
-
-rule index_genomic_alignment_samtools:
-    """
-        Index genome bamfile using samtools
-    """
-    input:
-        bam=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "map_genome",
-            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
-        ),
-    output:
-        bai=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "map_genome",
-            "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=()
-        ),
-    container:
-        "docker://quay.io/biocontainers/samtools:1.3.1--h1b8c3c0_8"
-    conda:
-        os.path.join(workflow.basedir, "envs", "samtools.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}.{{seqmode}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}.{{seqmode}}.stdout.log",
-        ),
-    shell:
-        "(samtools index \
-        {params.additional_params} \
-        {input.bam} {output.bai};) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "calculate_TIN_scores"
-
-
-rule calculate_TIN_scores:
-    """
-        Calculate transcript integrity (TIN) score
-    """
-    input:
-        bam=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "map_genome",
-                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
-            ),
-            sample=wildcards.sample,
-            seqmode=get_sample(
-                "seqmode", search_id="index", search_value=wildcards.sample
-            ),
-        ),
-        bai=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "map_genome",
-                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
-            ),
-            sample=wildcards.sample,
-            seqmode=get_sample(
-                "seqmode", search_id="index", search_value=wildcards.sample
-            ),
-        ),
-        transcripts_bed12=os.path.join(
-            config["output_dir"], "full_transcripts_protein_coding.bed"
-        ),
-    output:
-        TIN_score=temp(
-            os.path.join(
-                config["output_dir"], "samples", "{sample}", "TIN", "TIN_score.tsv"
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        sample="{sample}",
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "-i",
-                "-r",
-                "--names",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/tin-score-calculation:0.6.3--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "tin-score-calculation.yaml")
-    threads: 8
-    resources:
-        mem_mb=lambda wildcards, attempt, input: len(input.bam) * 1024 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], "samples", "{sample}", f"{current_rule}.log"
-        ),
-    shell:
-        "(calculate-tin.py \
-        -i {input.bam} \
-        -r {input.transcripts_bed12} \
-        --names {params.sample} \
-        -p {threads} \
-        {params.additional_params} \
-        > {output.TIN_score};) 2> {log.stderr}"
-
-
-current_rule = "salmon_quantmerge_genes"
-
-
-rule salmon_quantmerge_genes:
-    """
-        Merge gene quantifications into a single file
-    """
-    input:
-        salmon_in=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "{sample}.salmon.{seqmode}",
-                "quant.sf",
-            ),
-            zip,
-            sample=pd.unique(samples_table.index.values),
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-    output:
-        salmon_out=os.path.join(
-            config["output_dir"],
-            "summary_salmon",
-            "quantmerge",
-            "genes_{salmon_merge_on}.tsv",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        salmon_in=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "{sample}.salmon.{seqmode}",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-        sample_name_list=expand(
-            "{sample}", sample=pd.unique(samples_table.index.values)
-        ),
-        salmon_merge_on="{salmon_merge_on}",
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--quants",
-                "--genes",
-                "--transcripts",
-                "--names",
-                "--column",
-                "--output",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "salmon.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt, input: len(input.salmon_in) * 1024 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stdout.log"
-        ),
-    shell:
-        "(salmon quantmerge \
-        --quants {params.salmon_in} \
-        --genes \
-        --names {params.sample_name_list} \
-        --column {params.salmon_merge_on} \
-        --output {output.salmon_out};) \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "salmon_quantmerge_transcripts"
-
-
-rule salmon_quantmerge_transcripts:
-    """
-        Merge transcript quantifications into a single file
-    """
-    input:
-        salmon_in=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "{sample}.salmon.{seqmode}",
-                "quant.sf",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-    output:
-        salmon_out=os.path.join(
-            config["output_dir"],
-            "summary_salmon",
-            "quantmerge",
-            "transcripts_{salmon_merge_on}.tsv",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        salmon_in=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "{sample}.salmon.{seqmode}",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-        sample_name_list=expand(
-            "{sample}", sample=pd.unique(samples_table.index.values)
-        ),
-        salmon_merge_on="{salmon_merge_on}",
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--quants",
-                "--genes",
-                "--transcripts",
-                "--names",
-                "--column",
-                "--output",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "salmon.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt, input: len(input.salmon_in) * 1000 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{salmon_merge_on}}.stdout.log"
-        ),
-    shell:
-        "(salmon quantmerge \
-        --quants {params.salmon_in} \
-        --names {params.sample_name_list} \
-        --column {params.salmon_merge_on} \
-        --output {output.salmon_out}) \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "kallisto_merge_genes"
-
-
-rule kallisto_merge_genes:
-    """
-        Merge gene quantifications into single file
-    """
-    input:
-        pseudoalignment=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "quant_kallisto",
-                "{sample}.{seqmode}.kallisto.pseudo.sam",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-        gtf=get_sample("gtf"),
-    output:
-        gn_tpm=os.path.join(config["output_dir"], "summary_kallisto", "genes_tpm.tsv"),
-        gn_counts=os.path.join(
-            config["output_dir"], "summary_kallisto", "genes_counts.tsv"
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        dir_out=lambda wildcards, output: os.path.dirname(output.gn_counts),
-        tables=",".join(
-            expand(
-                os.path.join(
-                    config["output_dir"],
-                    "samples",
-                    "{sample}",
-                    "quant_kallisto",
-                    "abundance.h5",
-                ),
-                sample=[i for i in pd.unique(samples_table.index.values)],
-            )
-        ),
-        sample_name_list=",".join(
-            expand("{sample}", sample=pd.unique(samples_table.index.values))
-        ),
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--input",
-                "--names",
-                "--txOut",
-                "--anno",
-                "--output",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/r-merge-kallisto:0.6--hdfd78af_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "r-merge-kallisto.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt, input: len(input.pseudoalignment)
-        * 1000
-        * attempt,
-    log:
-        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
-        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
-    shell:
-        "(merge_kallisto.R \
-        --input {params.tables} \
-        --names {params.sample_name_list} \
-        --txOut FALSE \
-        --anno {input.gtf} \
-        --output {params.dir_out} \
-        {params.additional_params} ) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "kallisto_merge_transcripts"
-
-
-rule kallisto_merge_transcripts:
-    """
-        Merge transcript quantifications into a single files
-    """
-    input:
-        pseudoalignment=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "quant_kallisto",
-                "{sample}.{seqmode}.kallisto.pseudo.sam",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-    output:
-        tx_tpm=os.path.join(
-            config["output_dir"], "summary_kallisto", "transcripts_tpm.tsv"
-        ),
-        tx_counts=os.path.join(
-            config["output_dir"], "summary_kallisto", "transcripts_counts.tsv"
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        dir_out=lambda wildcards, output: os.path.dirname(output.tx_counts),
-        tables=",".join(
-            expand(
-                os.path.join(
-                    config["output_dir"],
-                    "samples",
-                    "{sample}",
-                    "quant_kallisto",
-                    "abundance.h5",
-                ),
-                sample=[i for i in pd.unique(samples_table.index.values)],
-            )
-        ),
-        sample_name_list=",".join(
-            expand("{sample}", sample=pd.unique(samples_table.index.values))
-        ),
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--input",
-                "--names",
-                "--txOut",
-                "--output",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/r-merge-kallisto:0.6--hdfd78af_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "r-merge-kallisto.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt, input: len(input.pseudoalignment)
-        * 1024
-        * attempt,
-    log:
-        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
-        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
-    shell:
-        "(merge_kallisto.R \
-        --input {params.tables} \
-        --names {params.sample_name_list} \
-        --output {params.dir_out} \
-        {params.additional_params}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "pca_salmon"
-
-
-rule pca_salmon:
-    input:
-        tpm=os.path.join(
-            config["output_dir"], "summary_salmon", "quantmerge", "{molecule}_tpm.tsv"
-        ),
-    output:
-        out=directory(
-            os.path.join(config["output_dir"], "zpca", "pca_salmon_{molecule}")
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--tpm",
-                "--out",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/zpca:0.8.3.post1--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "zpca.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{molecule}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{molecule}}.stdout.log"
-        ),
-    shell:
-        "(zpca-tpm  \
-        --tpm {input.tpm} \
-        --out {output.out} \
-        {params.additional_params}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "pca_kallisto"
-
-
-rule pca_kallisto:
-    input:
-        tpm=os.path.join(config["output_dir"], "summary_kallisto", "{molecule}_tpm.tsv"),
-    output:
-        out=directory(
-            os.path.join(config["output_dir"], "zpca", "pca_kallisto_{molecule}")
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--tpm",
-                "--out",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/zpca:0.8.3.post1--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "zpca.yaml")
-    threads: 1
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], f"{current_rule}_{{molecule}}.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], f"{current_rule}_{{molecule}}.stdout.log"
-        ),
-    shell:
-        "(zpca-tpm  \
-        --tpm {input.tpm} \
-        --out {output.out} \
-        {params.additional_params}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "star_rpm"
-
-
-rule star_rpm:
-    """
-        Create stranded bedgraph coverage with STARs RPM normalisation
-    """
-    input:
-        bam=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "map_genome",
-                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam",
-            ),
-            sample=wildcards.sample,
-            seqmode=get_sample(
-                "seqmode", search_id="index", search_value=wildcards.sample
-            ),
-        ),
-        bai=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "map_genome",
-                "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai",
-            ),
-            sample=wildcards.sample,
-            seqmode=get_sample(
-                "seqmode", search_id="index", search_value=wildcards.sample
-            ),
-        ),
-    output:
-        str1=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.Unique.str1.out.bg",
-            )
-        ),
-        str2=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.UniqueMultiple.str1.out.bg",
-            )
-        ),
-        str3=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.Unique.str2.out.bg",
-            )
-        ),
-        str4=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.UniqueMultiple.str2.out.bg",
-            )
-        ),
-    shadow:
-        "full"
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        out_dir=lambda wildcards, output: os.path.dirname(output.str1),
-        prefix=lambda wildcards, output: os.path.join(
-            os.path.dirname(output.str1), f"{str(wildcards.sample)}_"
-        ),
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--runMode",
-                "--inputBAMfile",
-                "--outWigType",
-                "--outFileNamePrefix",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
-    conda:
-        os.path.join(workflow.basedir, "envs", "STAR.yaml")
-    threads: 4
-    resources:
-        mem_mb=lambda wildcards, attempt: 8192 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"], "samples", "{sample}", f"{current_rule}_.stderr.log"
-        ),
-        stdout=os.path.join(
-            config["log_dir"], "samples", "{sample}", f"{current_rule}_.stdout.log"
-        ),
-    shell:
-        "(mkdir -p {params.out_dir}; \
-        chmod -R 777 {params.out_dir}; \
-        STAR \
-        --runMode inputAlignmentsFromBAM \
-        --runThreadN {threads} \
-        --inputBAMfile {input.bam} \
-        --outWigType bedGraph \
-        --outFileNamePrefix {params.prefix}) \
-        {params.additional_params} \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "rename_star_rpm_for_alfa"
-
-
-rule rename_star_rpm_for_alfa:
-    input:
-        plus=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.{unique}.{plus}.out.bg",
-            ),
-            sample=wildcards.sample,
-            unique=alfa_dict[wildcards.renamed_unique],
-            plus=get_directionality(
-                get_sample(
-                    "libtype", search_id="index", search_value=wildcards.sample
-                ),
-                "alfa_plus",
-            ),
-        ),
-        minus=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "STAR_coverage",
-                "{sample}_Signal.{unique}.{minus}.out.bg",
-            ),
-            sample=wildcards.sample,
-            unique=alfa_dict[wildcards.renamed_unique],
-            minus=get_directionality(
-                get_sample(
-                    "libtype", search_id="index", search_value=wildcards.sample
-                ),
-                "alfa_minus",
-            ),
-        ),
-    output:
-        plus=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "ALFA",
-                "{renamed_unique}",
-                "{sample}.{renamed_unique}.plus.bg",
-            )
-        ),
-        minus=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "ALFA",
-                "{renamed_unique}",
-                "{sample}.{renamed_unique}.minus.bg",
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-    container:
-        "docker://ubuntu:focal-20210416"
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{renamed_unique}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{renamed_unique}}.stdout.log",
-        ),
-    shell:
-        "(cp {input.plus} {output.plus}; \
-        cp {input.minus} {output.minus};) \
-        1>{log.stdout} 2>{log.stderr}"
-
-
-current_rule = "generate_alfa_index"
-
-
-rule generate_alfa_index:
-    """ Generate ALFA index files from sorted GTF file """
-    input:
-        gtf=lambda wildcards: os.path.abspath(
-            get_sample("gtf", search_id="organism", search_value=wildcards.organism)
-        ),
-        chr_len=os.path.join(
-            config["star_indexes"],
-            "{organism}",
-            "{index_size}",
-            "STAR_index",
-            "chrNameLength.txt",
-        ),
-    output:
-        index_stranded=os.path.join(
-            config["alfa_indexes"],
-            "{organism}",
-            "{index_size}",
-            "ALFA",
-            "sorted_genes.stranded.ALFA_index",
-        ),
-        index_unstranded=os.path.join(
-            config["alfa_indexes"],
-            "{organism}",
-            "{index_size}",
-            "ALFA",
-            "sorted_genes.unstranded.ALFA_index",
-        ),
-        temp_dir=temp(
-            directory(
-                os.path.join(
-                    config["alfa_indexes"],
-                    "{organism}",
-                    "{index_size}",
-                    "ALFA_temp",
-                )
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        genome_index="sorted_genes",
-        out_dir=lambda wildcards, output: os.path.dirname(output.index_stranded),
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "-a",
-                "-g",
-                "--chr_len",
-                "-o",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "alfa.yaml")
-    threads: 4
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        os.path.join(
-            config["log_dir"], f"{current_rule}_{{organism}}_{{index_size}}.log"
-        ),
-    shell:
-        "(mkdir -p {output.temp_dir}; \
-        alfa -a {input.gtf} \
-        -g {params.genome_index} \
-        --chr_len {input.chr_len} \
-        --temp_dir {output.temp_dir} \
-        -p {threads} \
-        -o {params.out_dir} \
-        {params.additional_params}) \
-        &> {log}"
-
-
-current_rule = "alfa_qc"
-
-
-rule alfa_qc:
-    """
-        Run ALFA from stranded bedgraph files
-    """
-    input:
-        plus=lambda wildcards: os.path.join(
-            config["output_dir"],
-            "samples",
-            wildcards.sample,
-            "ALFA",
-            wildcards.renamed_unique,
-            f"{wildcards.sample}.{wildcards.renamed_unique}.plus.bg",
-        ),
-        minus=lambda wildcards: os.path.join(
-            config["output_dir"],
-            "samples",
-            wildcards.sample,
-            "ALFA",
-            wildcards.renamed_unique,
-            f"{wildcards.sample}.{wildcards.renamed_unique}.minus.bg",
-        ),
-        gtf=lambda wildcards: os.path.join(
-            config["alfa_indexes"],
-            get_sample("organism", search_id="index", search_value=wildcards.sample),
-            get_sample("index_size", search_id="index", search_value=wildcards.sample),
-            "ALFA",
-            "sorted_genes.stranded.ALFA_index",
-        ),
-    output:
-        biotypes=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "ALFA",
-                "{renamed_unique}",
-                "ALFA_plots.Biotypes.pdf",
-            )
-        ),
-        categories=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "ALFA",
-                "{renamed_unique}",
-                "ALFA_plots.Categories.pdf",
-            )
-        ),
-        table=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "ALFA",
-            "{renamed_unique}",
-            "{sample}.ALFA_feature_counts.tsv",
-        ),
-        temp_dir=temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "samples",
-                    "{sample}",
-                    "ALFA",
-                    "{renamed_unique}",
-                    "ALFA_temp",
-                )
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        out_dir=lambda wildcards, output: os.path.dirname(output.biotypes),
-        genome_index=lambda wildcards, input: os.path.abspath(
-            os.path.join(os.path.dirname(input.gtf), "sorted_genes")
-        ),
-        plus=lambda wildcards, input: os.path.basename(input.plus),
-        minus=lambda wildcards, input: os.path.basename(input.minus),
-        name="{sample}",
-        alfa_orientation=lambda wildcards: get_directionality(
-            get_sample("libtype", search_id="index", search_value=wildcards.sample),
-            "alfa",
-        ),
-        temp_dir=lambda wildcards, output: os.path.abspath(
-            os.path.dirname(output.temp_dir)
-        ),
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "-g",
-                "--bedgraph",
-                "-s",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "alfa.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}.{{renamed_unique}}.log",
-        ),
-    shell:
-        "(mkdir -p {output.temp_dir};\
-        cd {params.out_dir}; \
-        alfa \
-        -g {params.genome_index} \
-        --bedgraph {params.plus} {params.minus} {params.name} \
-        -s {params.alfa_orientation} \
-        --temp_dir {params.temp_dir} \
-        {params.additional_params}) \
-        &> {log}"
-
-
-current_rule = "prepare_multiqc_config"
-
-
-rule prepare_multiqc_config:
-    """
-        Prepare config for the MultiQC
-    """
-    input:
-        script=os.path.join(workflow.basedir, "scripts", "zarp_multiqc_config.py"),
-    output:
-        multiqc_config=os.path.join(config["output_dir"], "multiqc_config.yaml"),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        logo_path=config["report_logo"] if "report_logo" in config else "",
-        multiqc_intro_text=config["report_description"],
-        url=config["report_url"] if "report_url" in config else "",
-        author_name=config["author_name"],
-        author_email=config["author_email"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--config",
-                "--intro-text",
-                "--custom-logo",
-                "--url",
-            ),
-        ),
-    log:
-        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
-        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
-    container:
-        "docker://quay.io/biocontainers/zavolan-multiqc-plugins:1.3--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "zavolan-multiqc-plugins.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    shell:
-        "(python {input.script} \
-        --config {output.multiqc_config} \
-        --intro-text '{params.multiqc_intro_text}' \
-        --custom-logo '{params.logo_path}' \
-        --url '{params.url}' \
-        --author-name '{params.author_name}' \
-        --author-email '{params.author_email}' \
-        {params.additional_params}) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "multiqc_report"
-
-
-rule multiqc_report:
-    """
-        Create report with MultiQC
-    """
-    input:
-        fastqc_se=expand(
-            os.path.join(
-                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
-            ),
-            sample=pd.unique(samples_table.index.values),
-            mate="fq1",
-        ),
-        fastqc_pe=expand(
-            os.path.join(
-                config["output_dir"], "samples", "{sample}", "fastqc", "{mate}"
-            ),
-            sample=[
-                i
-                for i in pd.unique(
-                    samples_table[samples_table["seqmode"] == "pe"].index.values
-                )
-            ],
-            mate="fq2",
-        ),
-        fastqc_trimmed_se=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "fastqc_trimmed",
-                "fq1_{seqmode}",
-            ),
-            zip,
-            sample=pd.unique(samples_table.index.values),
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-        fastqc_trimmed_pe=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "fastqc_trimmed",
-                "{mate}_pe",
-            ),
-            sample=[
-                i
-                for i in pd.unique(
-                    samples_table[samples_table["seqmode"] == "pe"].index.values
-                )
-            ],
-            mate="fq2",
-        ),
-        pseudoalignment=expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "quant_kallisto",
-                "{sample}.{seqmode}.kallisto.pseudo.sam",
-            ),
-            zip,
-            sample=[i for i in pd.unique(samples_table.index.values)],
-            seqmode=[
-                get_sample("seqmode", search_id="index", search_value=i)
-                for i in pd.unique(samples_table.index.values)
-            ],
-        ),
-        TIN_score=expand(
-            os.path.join(
-                config["output_dir"], "samples", "{sample}", "TIN", "TIN_score.tsv"
-            ),
-            sample=pd.unique(samples_table.index.values),
-        ),
-        tables=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "ALFA",
-                "{renamed_unique}",
-                "{sample}.ALFA_feature_counts.tsv",
-            ),
-            sample=pd.unique(samples_table.index.values),
-            renamed_unique=alfa_dict.keys(),
-        ),
-        zpca_salmon=expand(
-            os.path.join(config["output_dir"], "zpca", "pca_salmon_{molecule}"),
-            molecule=["genes", "transcripts"],
-        ),
-        zpca_kallisto=expand(
-            os.path.join(config["output_dir"], "zpca", "pca_kallisto_{molecule}"),
-            molecule=["genes", "transcripts"],
-        ),
-        multiqc_config=os.path.join(config["output_dir"], "multiqc_config.yaml"),
-    output:
-        multiqc_report=directory(os.path.join(config["output_dir"], "multiqc_summary")),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        results_dir=lambda wildcards, output: os.path.split(output.multiqc_report)[0],
-        log_dir=config["log_dir"],
-        additional_params=parse_rule_config(
-            rule_config,
-            current_rule=current_rule,
-            immutable=(
-                "--outdir",
-                "--config",
-            ),
-        ),
-    container:
-        "docker://quay.io/biocontainers/zavolan-multiqc-plugins:1.3--pyh5e36f6f_0"
-    conda:
-        os.path.join(workflow.basedir, "envs", "zavolan-multiqc-plugins.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 4096 * attempt,
-    log:
-        stderr=os.path.join(config["log_dir"], f"{current_rule}.stderr.log"),
-        stdout=os.path.join(config["log_dir"], f"{current_rule}.stdout.log"),
-    shell:
-        "(multiqc \
-        --outdir {output.multiqc_report} \
-        --config {input.multiqc_config} \
-        {params.additional_params} \
-        {params.results_dir} \
-        {params.log_dir};) \
-        1> {log.stdout} 2> {log.stderr}"
-
-
-current_rule = "sort_bed_4_big"
-
-
-rule sort_bed_4_big:
-    """
-        sort bedGraphs in order to work with bedGraphtobigWig
-    """
-    input:
-        bg=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "ALFA",
-            "{renamed_unique}",
-            "{sample}.{renamed_unique}.{strand}.bg",
-        ),
-    output:
-        sorted_bg=temp(
-            os.path.join(
-                config["output_dir"],
-                "samples",
-                "{sample}",
-                "bigWig",
-                "{renamed_unique}",
-                "{sample}_{renamed_unique}_{strand}.sorted.bg",
-            )
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=("-i",)
-        ),
-    container:
-        "docker://quay.io/biocontainers/bedtools:2.27.1--h9a82719_5"
-    conda:
-        os.path.join(workflow.basedir, "envs", "bedtools.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 2048 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{renamed_unique}}_{{strand}}.stderr.log",
-        ),
-    shell:
-        "(sortBed \
-        -i {input.bg} \
-        {params.additional_params} \
-        > {output.sorted_bg};) 2> {log.stderr}"
-
-
-current_rule = "prepare_bigWig"
-
-
-rule prepare_bigWig:
-    """
-        bedGraphtobigWig, for viewing in genome browsers
-    """
-    input:
-        sorted_bg=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "bigWig",
-            "{renamed_unique}",
-            "{sample}_{renamed_unique}_{strand}.sorted.bg",
-        ),
-        chr_sizes=lambda wildcards: os.path.join(
-            config["star_indexes"],
-            get_sample("organism", search_id="index", search_value=wildcards.sample),
-            get_sample("index_size", search_id="index", search_value=wildcards.sample),
-            "STAR_index",
-            "chrNameLength.txt",
-        ),
-    output:
-        bigWig=os.path.join(
-            config["output_dir"],
-            "samples",
-            "{sample}",
-            "bigWig",
-            "{renamed_unique}",
-            "{sample}_{renamed_unique}_{strand}.bw",
-        ),
-    params:
-        cluster_log_path=config["cluster_log_dir"],
-        additional_params=parse_rule_config(
-            rule_config, current_rule=current_rule, immutable=()
-        ),
-    container:
-        "docker://quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h0b8a92a_2"
-    conda:
-        os.path.join(workflow.basedir, "envs", "ucsc-bedgraphtobigwig.yaml")
-    resources:
-        mem_mb=lambda wildcards, attempt: 1024 * attempt,
-    log:
-        stderr=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{renamed_unique}}_{{strand}}.stderr.log",
-        ),
-        stdout=os.path.join(
-            config["log_dir"],
-            "samples",
-            "{sample}",
-            f"{current_rule}_{{renamed_unique}}_{{strand}}.stdout.log",
-        ),
-    shell:
-        "(bedGraphToBigWig \
-        {params.additional_params} \
-        {input.sorted_bg} \
-        {input.chr_sizes} \
-        {output.bigWig};) \
-        1> {log.stdout} 2> {log.stderr}"
-- 
GitLab


From 83496c92db17f7e4e428e111a5f28602f02879bd Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:17:58 +0000
Subject: [PATCH 03/22] Add "rules" directory with the three subworkflows

---
 workflow/rules/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 workflow/rules/.gitkeep

diff --git a/workflow/rules/.gitkeep b/workflow/rules/.gitkeep
new file mode 100644
index 0000000..e69de29
-- 
GitLab


From 0200484018381c28158eb6e05f82a29c14d1321b Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:18:39 +0000
Subject: [PATCH 04/22] Snakefile of the map subworkflow

---
 workflow/rules/map.smk | 1017 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1017 insertions(+)
 create mode 100644 workflow/rules/map.smk

diff --git a/workflow/rules/map.smk b/workflow/rules/map.smk
new file mode 100644
index 0000000..f5ef040
--- /dev/null
+++ b/workflow/rules/map.smk
@@ -0,0 +1,1017 @@
+###############################################################################
+# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
+# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
+#
+# Workflow to map small RNA-seq reads (e.g. from miRNA sequencing libraries).
+###############################################################################
+#
+# USAGE:
+# snakemake \
+#    --snakefile="map.smk" \
+#    -- cores 4 \
+#    --use-singularity \
+#    --singularity-args "--bind $PWD/../" \
+#    --printshellcmds \
+#    --rerun-incomplete \
+#    --verbose
+#
+################################################################################
+import os
+
+configfile: "config.yaml"
+
+# Rules that require internet connection for downloading files are included
+# in the localrules
+localrules:
+    finish_map,
+
+###############################################################################
+### Finish rule
+###############################################################################
+
+
+rule finish_map:
+    input:
+        maps=expand(
+            os.path.join(
+                config["output_dir"],
+                "{sample}",
+                "convertedSortedMappings_{sample}.bam.bai",
+            ),
+            sample=config["sample"],
+        ),
+
+###############################################################################
+### Uncompress fastq files
+###############################################################################
+
+
+rule uncompress_zipped_files:
+    input:
+        reads=os.path.join(config["map_input_dir"], "{sample}.{format}.gz"),
+    output:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "{format}", "reads.{format}"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "uncompress_zipped_files_{sample}_{format}.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"],
+            "uncompress_zipped_files_{sample}_{format}.log",
+        ),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(zcat {input.reads} > {output.reads}) &> {log}"
+
+
+###############################################################################
+### Quality filter
+###############################################################################
+
+
+rule fastq_quality_filter:
+    input:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "fastq", "reads.fastq"
+        ),
+    output:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "fastq_quality_filter_{sample}.log"
+        ),
+        p=config["p_value"],
+        q=config["q_value"],
+    log:
+        os.path.join(config["local_log"], "fastq_quality_filter_{sample}.log"),
+    singularity:
+        "docker://zavolab/fastx:0.0.14"
+    shell:
+        "(fastq_quality_filter \
+        -v \
+        -q {params.q} \
+        -p {params.p} \
+        -i {input.reads} \
+        > {output.reads} \
+        ) &> {log}"
+
+
+###############################################################################
+### Convert fastq to fasta
+###############################################################################
+
+
+rule fastq_to_fasta:
+    input:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq"
+        ),
+    output:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "fastq", "reads.fa"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "fastq_to_fasta_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log"),
+    singularity:
+        "docker://zavolab/fastx:0.0.14"
+    shell:
+        "(fastq_to_fasta -r -n -i {input.reads} > {output.reads}) &> {log}"
+
+
+###############################################################################
+### Format fasta file
+###############################################################################
+
+
+rule fasta_formatter:
+    input:
+        reads=lambda wildcards: os.path.join(
+            config["output_dir"],
+            wildcards.sample,
+            config[wildcards.sample]["format"],
+            "reads.fa",
+        ),
+    output:
+        reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "fasta_formatter_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "fasta_formatter_{sample}.log"),
+    singularity:
+        "docker://zavolab/fastx:0.0.14"
+    shell:
+        "(fasta_formatter -w 0 -i {input.reads} > {output.reads}) &> {log}"
+
+
+###############################################################################
+### Remove adapters
+###############################################################################
+
+
+rule cutadapt:
+    input:
+        reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"),
+    output:
+        reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "cutadapt_{sample}.log"
+        ),
+        adapter=lambda wildcards: config[wildcards.sample]["adapter"],
+        error_rate=config["error_rate"],
+        minimum_length=config["minimum_length"],
+        overlap=config["overlap"],
+        max_n=config["max_n"],
+    log:
+        os.path.join(config["local_log"], "cutadapt_{sample}.log"),
+    resources:
+        threads=8,
+    singularity:
+        "docker://zavolab/cutadapt:1.16"
+    shell:
+        "(cutadapt \
+        -a {params.adapter} \
+        --error-rate {params.error_rate} \
+        --minimum-length {params.minimum_length} \
+        --overlap {params.overlap} \
+        --trim-n \
+        --max-n {params.max_n} \
+        --cores {resources.threads} \
+        -o {output.reads} {input.reads}) &> {log}"
+
+
+###############################################################################
+### Collapse identical reads
+###############################################################################
+
+
+rule fastx_collapser:
+    input:
+        reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"),
+    output:
+        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "fastx_collapser_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "fastx_collapser_{sample}.log"),
+    singularity:
+        "docker://zavolab/fastx:0.0.14"
+    shell:
+        "(fastx_collapser -i {input.reads} > {output.reads}) &> {log}"
+
+
+###############################################################################
+### Segemehl genome mapping
+###############################################################################
+
+
+rule mapping_genome_segemehl:
+    input:
+        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
+        genome=config["genome"],
+        genome_index_segemehl=config["genome_index_segemehl"],
+    output:
+        gmap=os.path.join(
+            config["output_dir"], "{sample}", "segemehlGenome_map.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "mapping_genome_segemehl_{sample}.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "mapping_genome_segemehl_{sample}.log"
+        ),
+    resources:
+        mem=50,
+        time=12,
+        threads=8,
+    singularity:
+        "docker://zavolab/segemehl:0.2.0"
+    shell:
+        "(segemehl.x \
+        -i {input.genome_index_segemehl} \
+        -d {input.genome} \
+        -t {threads} \
+        -q {input.reads} \
+        -outfile {output.gmap} \
+        ) &> {log}"
+
+
+###############################################################################
+### Segemehl transcriptome mapping
+###############################################################################
+
+
+rule mapping_transcriptome_segemehl:
+    input:
+        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
+        transcriptome=config["transcriptome"],
+        transcriptome_index_segemehl=config["transcriptome_index_segemehl"],
+    output:
+        tmap=os.path.join(
+            config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "mapping_transcriptome_segemehl_{sample}.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "mapping_transcriptome_segemehl_{sample}.log"
+        ),
+    resources:
+        mem=10,
+        time=12,
+        threads=8,
+    singularity:
+        "docker://zavolab/segemehl:0.2.0"
+    shell:
+        "(segemehl.x \
+        -i {input.transcriptome_index_segemehl} \
+        -d {input.transcriptome} \
+        -t {threads} \
+        -q {input.reads} \
+        -outfile {output.tmap} \
+        ) &> {log}"
+
+
+###############################################################################
+### Filter fasta for oligomap mapping
+###############################################################################
+
+
+rule filter_fasta_for_oligomap:
+    input:
+        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
+        script=os.path.join(config["scripts_dir"], "validation_fasta.py"),
+    output:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "filter_fasta_for_oligomap_{sample}.log"
+        ),
+        max_length_reads=config["max_length_reads"],
+    log:
+        os.path.join(
+            config["local_log"], "filter_fasta_for_oligomap_{sample}.log"
+        ),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python {input.script} \
+        -r {params.max_length_reads} \
+        -i {input.reads} \
+        -o {output.reads} \
+        ) &> {log}"
+
+
+###############################################################################
+### Oligomap genome mapping
+###############################################################################
+
+
+rule mapping_genome_oligomap:
+    input:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
+        ),
+        target=config["genome"],
+    output:
+        gmap=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_map.fa"
+        ),
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_report.txt"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "mapping_genome_oligomap_{sample}.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "mapping_genome_oligomap_{sample}.log"
+        ),
+    resources:
+        mem=50,
+        time=6,
+        threads=8,
+    singularity:
+        "docker://zavolab/oligomap:1.0"
+    shell:
+        "(oligomap \
+        {input.target} \
+        {input.reads} \
+        -r {output.report} \
+        > {output.gmap} \
+        ) &> {log}"
+
+
+###############################################################################
+### Oligomap genome sorting
+###############################################################################
+
+
+rule sort_genome_oligomap:
+    input:
+        tmap=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_map.fa"
+        ),
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_report.txt"
+        ),
+        script=os.path.join(config["scripts_dir"], "blocksort.sh"),
+    output:
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_sorted.fa"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "sorting_genome_oligomap_{sample}.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "sorting_genome_oligomap_{sample}.log"
+        ),
+    resources:
+        threads=8,
+        time=6,
+    shell:
+        "(bash {input.script} \
+        {input.tmap} \
+        {resources.threads} \
+        {output.sort} \
+        ) &> {log}"
+
+
+###############################################################################
+### Oligomap genome mapping output to SAM
+###############################################################################
+
+
+rule oligomap_genome_toSAM:
+    input:
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_report.txt"
+        ),
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_sorted.fa"
+        ),
+        script=os.path.join(
+            config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py"
+        ),
+    output:
+        gmap=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_converted.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "oligomap_genome_toSAM_{sample}.log"
+        ),
+        nh=config["nh"],
+    log:
+        os.path.join(config["local_log"], "oligomap_genome_toSAM_{sample}.log"),
+    resources:
+        time=1,
+        queue=1,
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python {input.script} \
+        -i {input.sort} \
+        -n {params.nh} \
+        > {output.gmap}) &> {log}"
+
+
+###############################################################################
+### Oligomap trancriptome mapping
+###############################################################################
+
+
+rule mapping_transcriptome_oligomap:
+    input:
+        reads=os.path.join(
+            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
+        ),
+        target=config["transcriptome"],
+    output:
+        tmap=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"
+        ),
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "mapping_transcriptome_oligomap_{sample}.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "mapping_transcriptome_oligomap_{sample}.log"
+        ),
+    resources:
+        mem=10,
+        time=6,
+        threads=8,
+    singularity:
+        "docker://zavolab/oligomap:1.0"
+    shell:
+        "(oligomap \
+        {input.target} \
+        {input.reads} \
+        -s \
+        -r {output.report} \
+        > {output.tmap} \
+        ) &> {log}"
+
+
+###############################################################################
+### Oligomap trancriptome sorting
+###############################################################################
+
+
+rule sort_transcriptome_oligomap:
+    input:
+        tmap=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"
+        ),
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
+        ),
+        script=os.path.join(config["scripts_dir"], "blocksort.sh"),
+    output:
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "sorting_transcriptome_oligomap_{sample}.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "sorting_transcriptome_oligomap_{sample}.log"
+        ),
+    resources:
+        threads=8,
+    shell:
+        "(bash {input.script} \
+        {input.tmap} \
+        {resources.threads} \
+        {output.sort} \
+        ) &> {log}"
+
+
+###############################################################################
+### Oligomap transcriptome mapping ouput to SAM
+###############################################################################
+
+
+rule oligomap_transcriptome_toSAM:
+    input:
+        report=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
+        ),
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa"
+        ),
+        script=os.path.join(
+            config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py"
+        ),
+    output:
+        tmap=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "oligoTranscriptome_converted.sam",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "oligomap_transcriptome_toSAM_{sample}.log"
+        ),
+        nh=config["nh"],
+    log:
+        os.path.join(
+            config["local_log"], "oligomap_transcriptome_toSAM_{sample}.log"
+        ),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python {input.script} \
+        -i {input.sort} \
+        -n {params.nh} \
+        > {output.tmap} \
+        ) &> {log}"
+
+
+###############################################################################
+### Merge genome mappings
+###############################################################################
+
+
+rule merge_genome_maps:
+    input:
+        gmap1=os.path.join(
+            config["output_dir"], "{sample}", "segemehlGenome_map.sam"
+        ),
+        gmap2=os.path.join(
+            config["output_dir"], "{sample}", "oligoGenome_converted.sam"
+        ),
+    output:
+        gmaps=os.path.join(
+            config["output_dir"], "{sample}", "GenomeMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "merge_genome_maps_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "merge_genome_maps_{sample}.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cat {input.gmap1} {input.gmap2} > {output.gmaps}) &> {log}"
+
+
+###############################################################################
+### Merge trancriptome mappings
+###############################################################################
+
+
+rule merge_transcriptome_maps:
+    input:
+        tmap1=os.path.join(
+            config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam"
+        ),
+        tmap2=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "oligoTranscriptome_converted.sam",
+        ),
+    output:
+        tmaps=os.path.join(
+            config["output_dir"], "{sample}", "TranscriptomeMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "merge_transcriptome_maps_{sample}.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "merge_transcriptome_maps_{sample}.log"
+        ),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cat {input.tmap1} {input.tmap2} > {output.tmaps}) &> {log}"
+
+
+###############################################################################
+### Filter NH genome
+###############################################################################
+
+
+rule nh_filter_genome:
+    input:
+        gmaps=os.path.join(
+            config["output_dir"], "{sample}", "GenomeMappings.sam"
+        ),
+        script=os.path.join(config["scripts_dir"], "nh_filter.py"),
+    output:
+        gmaps=os.path.join(
+            config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "nh_filter_genome_{sample}.log"
+        ),
+        nh=config["nh"],
+    log:
+        os.path.join(config["local_log"], "nh_filter_genome_{sample}.log"),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python {input.script} \
+        {input.gmaps} \
+        {params.nh} \
+        {output.gmaps} \
+        ) &> {log}"
+
+
+###############################################################################
+### Filter NH transcriptome
+###############################################################################
+
+
+rule filter_nh_transcriptome:
+    input:
+        tmaps=os.path.join(
+            config["output_dir"], "{sample}", "TranscriptomeMappings.sam"
+        ),
+        script=os.path.join(config["scripts_dir"], "nh_filter.py"),
+    output:
+        tmaps=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "nhfiltered_TranscriptomeMappings.sam",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "filter_nh_transcriptome_{sample}.log"
+        ),
+        nh=config["nh"],
+    log:
+        os.path.join(
+            config["local_log"], "filter_nh_transcriptome_{sample}.log"
+        ),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python {input.script} \
+        {input.tmaps} \
+        {params.nh} \
+        {output.tmaps} \
+        ) &> {log}"
+
+
+###############################################################################
+### Remove header genome mappings
+###############################################################################
+
+
+rule remove_headers_genome:
+    input:
+        gmap=os.path.join(
+            config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam"
+        ),
+    output:
+        gmap=os.path.join(
+            config["output_dir"], "{sample}", "noheader_GenomeMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "remove_headers_genome_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "remove_headers_genome_{sample}.log"),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "samtools view {input.gmap} > {output.gmap}"
+
+
+###############################################################################
+### Remove header transcriptome mappings
+###############################################################################
+
+
+rule remove_headers_transcriptome:
+    input:
+        tmap=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "nhfiltered_TranscriptomeMappings.sam",
+        ),
+    output:
+        tmap=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "noheader_TranscriptomeMappings.sam",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "remove_headers_transcriptome_{sample}.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "remove_headers_transcriptome_{sample}.log"
+        ),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "samtools view {input.tmap} > {output.tmap}"
+
+
+###############################################################################
+### Transcriptome to genome coordinates
+###############################################################################
+
+
+rule trans_to_gen:
+    input:
+        tmap=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "noheader_TranscriptomeMappings.sam",
+        ),
+        script=os.path.join(config["scripts_dir"], "sam_trx_to_sam_gen.pl"),
+        exons=config["exons"],
+    output:
+        genout=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "trans_to_gen_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "trans_to_gen_{sample}.log"),
+    singularity:
+        "docker://zavolab/perl:5.28"
+    shell:
+        "(perl {input.script} \
+        --in {input.tmap} \
+        --exons {input.exons} \
+        --out {output.genout} \
+        ) &> {log}"
+
+
+###############################################################################
+### Concatenate genome and trancriptome mappings
+###############################################################################
+
+
+rule cat_mapping:
+    input:
+        gmap1=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"),
+        gmap2=os.path.join(
+            config["output_dir"], "{sample}", "noheader_GenomeMappings.sam"
+        ),
+    output:
+        catmaps=os.path.join(
+            config["output_dir"], "{sample}", "catMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "cat_mapping_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "cat_mapping_{sample}.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cat {input.gmap1} {input.gmap2} > {output.catmaps}) &> {log}"
+
+
+###############################################################################
+### Add header
+###############################################################################
+
+
+rule add_header:
+    input:
+        header=config["header_of_collapsed_fasta"],
+        catmaps=os.path.join(
+            config["output_dir"], "{sample}", "catMappings.sam"
+        ),
+    output:
+        concatenate=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "concatenated_header_catMappings.sam",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "add_header_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "add_header_{sample}.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cat {input.header} {input.catmaps} > {output.concatenate}) &> {log}"
+
+
+###############################################################################
+### Sort mapped file by IDs
+###############################################################################
+
+
+rule sort_id:
+    input:
+        concatenate=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "concatenated_header_catMappings.sam",
+        ),
+    output:
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "header_sorted_catMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(config["cluster_log"], "sort_id_{sample}.log"),
+    log:
+        os.path.join(config["local_log"], "sort_id_{sample}.log"),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools sort -n -o {output.sort} {input.concatenate}) &> {log}"
+
+
+###############################################################################
+### Remove inferior mappings (keeping multimappers)
+###############################################################################
+
+
+rule remove_inferiors:
+    input:
+        sort=os.path.join(
+            config["output_dir"], "{sample}", "header_sorted_catMappings.sam"
+        ),
+        script=os.path.join(
+            config["scripts_dir"],
+            "sam_remove_duplicates_inferior_alignments_multimappers.1_5.pl",
+        ),
+    output:
+        remove_inf=os.path.join(
+            config["output_dir"], "{sample}", "removeInferiors.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "remove_inferiors_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "remove_inferiors_{sample}.log"),
+    resources:
+        mem=15,
+        threads=4,
+    singularity:
+        "docker://zavolab/perl:5.28"
+    shell:
+        "(perl {input.script} \
+        --print-header \
+        --keep-mm \
+        --in {input.sort} \
+        --out {output.remove_inf} \
+        ) &> {log}"
+
+
+###############################################################################
+### Uncollapse reads
+###############################################################################
+
+
+rule uncollapse_reads:
+    input:
+        maps=os.path.join(
+            config["output_dir"], "{sample}", "removeInferiors.sam"
+        ),
+        script=os.path.join(config["scripts_dir"], "sam_uncollapse.pl"),
+    output:
+        maps=os.path.join(
+            config["output_dir"], "{sample}", "uncollapsedMappings.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "uncollapse_reads_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "uncollapse_reads_{sample}.log"),
+    singularity:
+        "docker://zavolab/perl:5.28"
+    shell:
+        "(perl {input.script} \
+        --suffix \
+        --in {input.maps} \
+        --out {output.maps} \
+        ) &> {log}"
+
+
+###############################################################################
+### Convert SAM to BAM
+###############################################################################
+
+
+rule convert_to_bam:
+    input:
+        maps=os.path.join(
+            config["output_dir"], "{sample}", "uncollapsedMappings.sam"
+        ),
+    output:
+        maps=os.path.join(
+            config["output_dir"], "{sample}", "mappingsConverted.bam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "convert_to_bam_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "convert_to_bam_{sample}.log"),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools view -b {input.maps} > {output.maps}) &> {log}"
+
+
+###############################################################################
+### Sort by coordinate position
+###############################################################################
+
+
+rule sort_by_position:
+    input:
+        maps=os.path.join(
+            config["output_dir"], "{sample}", "mappingsConverted.bam"
+        ),
+    output:
+        maps=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "convertedSortedMappings_{sample}.bam",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "sort_by_position_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "sort_by_position_{sample}.log"),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools sort {input.maps} > {output.maps}) &> {log}"
+
+
+###############################################################################
+### Create bam index
+###############################################################################
+
+
+rule index_bam:
+    input:
+        maps=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "convertedSortedMappings_{sample}.bam",
+        ),
+    output:
+        maps=os.path.join(
+            config["output_dir"],
+            "{sample}",
+            "convertedSortedMappings_{sample}.bam.bai",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "index_bam_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "index_bam_{sample}.log"),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools index -b {input.maps} > {output.maps}) &> {log}"
-- 
GitLab


From c9a00a2155a0a1a957da8d77a6cfdd4ef899738b Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:19:19 +0000
Subject: [PATCH 05/22] Snakefile of the "prepare" subworkflow

---
 workflow/rules/prepare.smk | 751 +++++++++++++++++++++++++++++++++++++
 1 file changed, 751 insertions(+)
 create mode 100644 workflow/rules/prepare.smk

diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk
new file mode 100644
index 0000000..b254a90
--- /dev/null
+++ b/workflow/rules/prepare.smk
@@ -0,0 +1,751 @@
+###############################################################################
+# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
+# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
+#
+# Snakemake workflow to download and prepare the necessary files 
+# for smallRNA-seq related workflows.
+#
+###############################################################################
+#
+# USAGE:
+# snakemake \
+#    --snakefile="prepare.smk" \
+#    --cores 4 \
+#    --use-singularity \
+#    --singularity-args "--bind $PWD/../" \
+#    --printshellcmds \
+#    --rerun-incomplete \
+#    --verbose
+#
+################################################################################
+import os
+
+configfile: "config.yaml" 
+
+# Rules that require internet connection for downloading files are included
+# in the localrules
+localrules:
+    finish_prepare,
+    genome_process,
+    filter_anno_gtf,
+    mirna_anno,
+    dict_chr,
+
+
+###############################################################################
+### Finish rule
+###############################################################################
+
+
+rule finish_prepare:
+    input:
+        idx_transcriptome=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "transcriptome_index_segemehl.idx",
+            ),
+            organism=config["organism"],
+        ),
+        idx_genome=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "genome_index_segemehl.idx",
+            ),
+            organism=config["organism"],
+        ),
+        exons=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "exons.bed",
+            ),
+            organism=config["organism"],
+        ),
+        header=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "headerOfCollapsedFasta.sam",
+            ),
+            organism=config["organism"],
+        ),
+        mirnafilt=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "mirna_filtered.bed",
+            ),
+            organism=config["organism"],
+        ),
+        isomirs=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "isomirs_annotation.bed",
+            ),
+            organism=config["organism"],
+        ),
+
+
+
+###############################################################################
+### Download and process genome IDs
+###############################################################################
+rule genome_process:
+    input:
+        script=os.path.join(config["scripts_dir"], "genome_process.sh"),
+    output:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    params:
+        url=lambda wildcards: config[wildcards.organism]["genome_url"],
+        dir_out=os.path.join(config["output_dir"], "{organism}"),
+    log:
+        os.path.join(config["local_log"], "{organism}", "genome_process.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(bash {input.script} {params.dir_out} {log} {params.url})"
+
+
+###############################################################################
+### Download and filter gtf by transcript_level
+###############################################################################
+
+
+rule filter_anno_gtf:
+    input:
+        script=os.path.join(config["scripts_dir"], "filter_anno_gtf.sh"),
+    output:
+        gtf=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "gene_annotations.filtered.gtf",
+        ),
+    params:
+        url=lambda wildcards: config[wildcards.organism]["gtf_url"],
+        dir_out=os.path.join(config["output_dir"], "{organism}"),
+    log:
+        os.path.join(config["local_log"], "{organism}", "filter_anno_gtf.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(bash {input.script} {params.dir_out} {log} {params.url}) &> {log}"
+
+
+###############################################################################
+### Extract transcriptome sequences in FASTA from genome.
+###############################################################################
+
+
+rule extract_transcriptome_seqs:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+        gtf=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "gene_annotations.filtered.gtf",
+        ),
+    output:
+        fasta=os.path.join(
+            config["output_dir"], "{organism}", "transcriptome.fa"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "{organism}",
+            "extract_transcriptome_seqs.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"],
+            "{organism}",
+            "extract_transcriptome_seqs.log",
+        ),
+    singularity:
+        "docker://zavolab/cufflinks:2.2.1"
+    shell:
+        "(gffread -w {output.fasta} -g {input.genome} {input.gtf}) &> {log}"
+
+
+###############################################################################
+### Trim transcript IDs from FASTA file
+###############################################################################
+
+
+rule trim_fasta:
+    input:
+        fasta=os.path.join(
+            config["output_dir"], "{organism}", "transcriptome.fa"
+        ),
+        script=os.path.join(config["scripts_dir"], "validation_fasta.py"),
+    output:
+        fasta=os.path.join(
+            config["output_dir"], "{organism}", "transcriptome_idtrim.fa"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "trim_fasta.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "trim_fasta.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        """(awk \
+        -F" " \
+        "/^>/ {{print \$1; next}} 1" \
+        {input.fasta} \
+        > {output.fasta} \
+        ) &> {log}"""
+
+
+###############################################################################
+### Generate segemehl index for transcripts
+###############################################################################
+
+
+rule generate_segemehl_index_transcriptome:
+    input:
+        fasta=os.path.join(
+            config["output_dir"], "{organism}", "transcriptome_idtrim.fa"
+        ),
+    output:
+        idx=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "transcriptome_index_segemehl.idx",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "{organism}",
+            "generate_segemehl_index_transcriptome.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"],
+            "{organism}",
+            "generate_segemehl_index_transcriptome.log",
+        ),
+    resources:
+        mem=10,
+        threads=8,
+        time=6,
+    singularity:
+        "docker://zavolab/segemehl:0.2.0"
+    shell:
+        "(segemehl.x -x {output.idx} -d {input.fasta}) &> {log}"
+
+
+###############################################################################
+### Generate segemehl index for genome
+###############################################################################
+
+
+rule generate_segemehl_index_genome:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    output:
+        idx=os.path.join(
+            config["output_dir"], "{organism}", "genome_index_segemehl.idx"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "{organism}",
+            "generate_segemehl_index_genome.log",
+        ),
+    log:
+        os.path.join(
+            config["local_log"],
+            "{organism}",
+            "generate_segemehl_index_genome.log",
+        ),
+    resources:
+        mem=60,
+        threads=8,
+        time=6,
+    singularity:
+        "docker://zavolab/segemehl:0.2.0"
+    shell:
+        "(segemehl.x -x {output.idx} -d {input.genome}) &> {log}"
+
+
+###############################################################################
+### GTF file of exons (genomic coordinates)
+###############################################################################
+
+
+rule get_exons_gtf:
+    input:
+        gtf=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "gene_annotations.filtered.gtf",
+        ),
+        script=os.path.join(config["scripts_dir"], "get_lines_w_pattern.sh"),
+    output:
+        exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "get_exons_gtf.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "get_exons_gtf.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(bash \
+        {input.script} \
+        -f {input.gtf} \
+        -c 3 \
+        -p exon \
+        -o {output.exons} \
+        ) &> {log}"
+
+
+###############################################################################
+### Convert GTF file of exons to BED file
+###############################################################################
+
+
+rule gtftobed:
+    input:
+        exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"),
+        script=os.path.join(config["scripts_dir"], "gtf_exons_bed.1.1.2.R"),
+    output:
+        exons=os.path.join(config["output_dir"], "{organism}", "exons.bed"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "gtftobed.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "gtftobed.log"),
+    singularity:
+        "docker://zavolab/r-zavolab:3.5.1"
+    shell:
+        "(Rscript \
+        {input.script} \
+        --gtf {input.exons} \
+        -o {output.exons} \
+        ) &> {log}"
+
+
+###############################################################################
+### Create header for SAM file
+###############################################################################
+
+
+rule create_header_genome:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    output:
+        header=os.path.join(
+            config["output_dir"], "{organism}", "headerOfCollapsedFasta.sam"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "create_header_genome.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "{organism}", "create_header_genome.log"
+        ),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools dict -o {output.header} --uri=NA {input.genome}) &> {log}"
+
+
+###############################################################################
+### Download miRNA annotation
+###############################################################################
+
+
+rule mirna_anno:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    output:
+        anno=os.path.join(
+            config["output_dir"], "{organism}", "raw", "mirna.gff3"
+        ),
+    params:
+        anno=lambda wildcards: config[wildcards.organism]["mirna_url"],
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "mirna_anno.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "mirna_anno.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(wget {params.anno} -O {output.anno}) &> {log}"
+
+
+###############################################################################
+### Download dictionary mapping chr
+###############################################################################
+
+
+rule dict_chr:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    output:
+        map_chr=os.path.join(
+            config["output_dir"], "{organism}", "UCSC2ensembl.txt"
+        ),
+    params:
+        map_chr=lambda wildcards: config[wildcards.organism]["map_chr_url"],
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "dict_chr.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "dict_chr.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(wget {params.map_chr} -O {output.map_chr}) &> {log}"
+
+
+###############################################################################
+### Mapping chromosomes names, UCSC <-> ENSEMBL
+###############################################################################
+
+
+rule map_chr_names:
+    input:
+        anno=os.path.join(
+            config["output_dir"], "{organism}", "raw", "mirna.gff3"
+        ),
+        script=os.path.join(config["scripts_dir"], "map_chromosomes.pl"),
+        map_chr=os.path.join(
+            config["output_dir"], "{organism}", "UCSC2ensembl.txt"
+        ),
+    output:
+        gff=os.path.join(
+            config["output_dir"], "{organism}", "mirna_chr_mapped.gff3"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "map_chr_names.log"
+        ),
+        column=lambda wildcards: config[wildcards.organism]["column"],
+        delimiter=lambda wildcards: config[wildcards.organism]["delimiter"],
+    log:
+        os.path.join(config["local_log"], "{organism}", "map_chr_names.log"),
+    singularity:
+        "docker://zavolab/perl:5.28"
+    shell:
+        "(perl {input.script} \
+        {input.anno} \
+        {params.column} \
+        {params.delimiter} \
+        {input.map_chr} \
+        {output.gff} \
+        ) &> {log}"
+
+
+###############################################################################
+### Filtering _1 miR IDs
+###############################################################################
+
+
+rule filter_mir_1_anno:
+    input:
+        gff=os.path.join(
+            config["output_dir"], "{organism}", "mirna_chr_mapped.gff3"
+        ),
+    output:
+        gff=os.path.join(
+            config["output_dir"], "{organism}", "mirna_filtered.gff3"
+        ),
+    params:
+        script=os.path.join(config["scripts_dir"], "filter_mir_1_anno.sh"),
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "filter_mir_1_anno.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "{organism}", "filter_mir_1_anno.log"
+        ),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(bash {params.script} -f {input.gff} -o {output.gff}) &> {log}"
+
+
+###############################################################################
+### GFF to BED (improve intersect memory efficient allowing to use -sorted)
+###############################################################################
+
+
+rule gfftobed:
+    input:
+        gff=os.path.join(
+            config["output_dir"], "{organism}", "mirna_filtered.gff3"
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "mirna_filtered.bed"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "gfftobed.log"
+        ),
+        out_dir=os.path.join(config["output_dir"]),
+    log:
+        os.path.join(config["local_log"], "{organism}", "gfftobed.log"),
+    singularity:
+        "docker://zavolab/bedops:2.4.35"
+    shell:
+        "(convert2bed -i gff < {input.gff} \
+        --sort-tmpdir={params.out_dir} \
+        > {output.bed} \
+        ) &> {log}"
+
+
+###############################################################################
+### Index genome fasta file
+###############################################################################
+
+
+rule create_index_fasta:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa"
+        ),
+    output:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa.fai"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "create_index_fasta.log"
+        ),
+    log:
+        os.path.join(
+            config["local_log"], "{organism}", "create_index_fasta.log"
+        ),
+    singularity:
+        "docker://zavolab/samtools:1.8"
+    shell:
+        "(samtools faidx {input.genome}) &> {log}"
+
+
+###############################################################################
+### Extract chromosome length
+###############################################################################
+
+
+rule extract_chr_len:
+    input:
+        genome=os.path.join(
+            config["output_dir"], "{organism}", "genome.processed.fa.fai"
+        ),
+    output:
+        chrsize=os.path.join(
+            config["output_dir"], "{organism}", "chr_size.txt"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "extract_chr_len.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "extract_chr_len.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cut -f1,2 {input.genome} > {output.chrsize}) &> {log}"
+
+
+###############################################################################
+### Extract mature miRNA
+###############################################################################
+
+
+rule filter_mature_mirs:
+    input:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "mirna_filtered.bed"
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "mirna_mature_filtered.bed"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "filter_mature_mirs.log"
+        ),
+        precursor="miRNA_primary_transcript",
+    log:
+        os.path.join(
+            config["local_log"], "{organism}", "filter_mature_mirs.log"
+        ),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(grep -v {params.precursor} {input.bed} > {output.bed}) &> {log}"
+
+
+###############################################################################
+### Create isomirs annotation file from mature miRNA 
+###############################################################################
+
+
+rule iso_anno:
+    input:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "mirna_mature_filtered.bed"
+        ),
+        chrsize=os.path.join(
+            config["output_dir"], "{organism}", "chr_size.txt"
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "iso_anno_5p{bp_5p}_3p{bp_3p}.bed",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "{organism}",
+            "iso_anno_5p{bp_5p}_3p{bp_3p}.log",
+        ),
+        bp_5p=lambda wildcards: wildcards.bp_5p,
+        bp_3p=lambda wildcards: wildcards.bp_3p,
+    log:
+        os.path.join(
+            config["local_log"],
+            "{organism}",
+            "iso_anno_5p{bp_5p}_3p{bp_3p}.log",
+        ),
+    singularity:
+        "docker://zavolab/bedtools:2.28.0"
+    shell:
+        "(bedtools slop \
+        -i {input.bed} \
+        -g {input.chrsize} \
+        -l {params.bp_5p} \
+        -r {params.bp_3p} \
+        > {output.bed} \
+        ) &> {log}"
+
+
+###############################################################################
+### Change miRNA names to isomirs names
+###############################################################################
+
+
+rule iso_anno_rename:
+    input:
+        bed=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "iso_anno_5p{bp_5p}_3p{bp_3p}.bed",
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"],
+            "{organism}",
+            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "{organism}",
+            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log",
+        ),
+        bp_5p=lambda wildcards: wildcards.bp_5p,
+        bp_3p=lambda wildcards: wildcards.bp_3p,
+    log:
+        os.path.join(
+            config["local_log"],
+            "{organism}",
+            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log",
+        ),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(sed \
+        's/;Derives/_5p{params.bp_5p}_3p{params.bp_3p};Derives/' \
+        {input.bed} \
+        > {output.bed} \
+        ) &> {log}"
+
+
+###############################################################################
+### Concatenate all isomirs annotation files
+###############################################################################
+
+
+rule iso_anno_concat:
+    input:
+        bed=lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed",
+            ),
+            organism=config["organism"],
+            bp_3p=config["bp_3p"],
+            bp_5p=config["bp_5p"],
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "iso_anno_concat.bed"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "iso_anno_concat.log"
+        ),
+        prefix=os.path.join(
+            config["output_dir"], "{organism}", "iso_anno_rename"
+        ),
+    log:
+        os.path.join(config["local_log"], "{organism}", "iso_anno_concat.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(cat {params.prefix}* > {output.bed}) &> {log}"
+
+
+###############################################################################
+### Remove non changing isomirs (5p0_3p0)
+###############################################################################
+
+
+rule iso_anno_final:
+    input:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "iso_anno_concat.bed"
+        ),
+    output:
+        bed=os.path.join(
+            config["output_dir"], "{organism}", "isomirs_annotation.bed"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "{organism}", "iso_anno_final.log"
+        ),
+        pattern="5p0_3p0",
+    log:
+        os.path.join(config["local_log"], "{organism}", "iso_anno_final.log"),
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(grep -v '{params.pattern}' {input.bed} > {output.bed}) &> {log}"
\ No newline at end of file
-- 
GitLab


From 2e3a966c12c7e853e3fd865d1087183ec0a40421 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:19:58 +0000
Subject: [PATCH 06/22] Snakefile of the "quantify" subworkflow

---
 workflow/rules/quantify.smk | 335 ++++++++++++++++++++++++++++++++++++
 1 file changed, 335 insertions(+)
 create mode 100644 workflow/rules/quantify.smk

diff --git a/workflow/rules/quantify.smk b/workflow/rules/quantify.smk
new file mode 100644
index 0000000..6eabb23
--- /dev/null
+++ b/workflow/rules/quantify.smk
@@ -0,0 +1,335 @@
+###############################################################################
+# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
+# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
+#
+# Pipeline to quantify miRNAs, including isomiRs, from miRNA-seq alignments.
+###############################################################################
+#
+# USAGE:
+# snakemake \
+#    --snakefile="quanitfy.smk" \
+#    --cores 4 \
+#    --use-singularity \
+#    --singularity-args "--bind $PWD/../" \ 
+#    --printshellcmds \
+#    --rerun-incomplete \
+#    --verbose
+#
+################################################################################
+
+import os
+
+configfile: "config.yaml"
+
+# Rules that require internet connection for downloading files are included
+# in the localrules
+localrules:
+    finish_quantify,
+
+
+###############################################################################
+### Finish rule
+###############################################################################
+
+
+rule finish_quantify:
+    input:
+        table1=expand(
+            os.path.join(
+                config["output_dir"], 
+                "TABLES", 
+                "counts.{mir}.tab",
+            ),
+            mir=config["mir_list"],
+        ),
+        table2=os.path.join(
+            config["output_dir"], 
+            "TABLES", 
+            "counts.isomirs.tab",
+        ),
+
+###############################################################################
+### BAM to BED
+###############################################################################
+
+
+rule bamtobed:
+    input:
+        alignment=os.path.join(
+            config["quantify_input_dir"],
+            "{sample}",
+            "convertedSortedMappings_{sample}.bam",
+        ),
+    output:
+        alignment=os.path.join(
+            config["output_dir"], "{sample}", "alignments.bed12"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "bamtobed_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "bamtobed_{sample}.log"),
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+    shell:
+        "(bedtools bamtobed \
+        -bed12 \
+        -tag NH \
+        -i {input.alignment} \
+        > {output.alignment} \
+        ) &> {log}"
+
+
+###############################################################################
+### Sort alignments
+###############################################################################
+
+
+rule sort_alignments:
+    input:
+        alignment=os.path.join(
+            config["output_dir"], "{sample}", "alignments.bed12"
+        ),
+    output:
+        alignment=os.path.join(
+            config["output_dir"], "{sample}", "sorted.alignments.bed12"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "sortalignment_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "sortalignment_{sample}.log"),
+    resources:
+        mem=4,
+        threads=8,
+    singularity:
+        "docker://zavolab/ubuntu:18.04"
+    shell:
+        "(sort \
+        -k1,1 \
+        -k2,2n \
+        {input.alignment} \
+        > {output.alignment} \
+        ) &> {log}"
+
+
+###############################################################################
+### miRNAs intersection
+###############################################################################
+
+
+rule intersect_mirna:
+    input:
+        alignment=os.path.join(
+            config["output_dir"], "{sample}", "sorted.alignments.bed12"
+        ),
+        mirna=config["mirnas_anno"],
+    output:
+        intersect=os.path.join(
+            config["output_dir"], "{sample}", "intersect_mirna.bed"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "intersection_mirna_{sample}.log"
+        ),
+    log:
+        os.path.join(config["local_log"], "intersection_mirna_{sample}.log"),
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+    shell:
+        "(bedtools intersect \
+        -wao \
+        -s \
+        -F 1 \
+        -sorted \
+        -b {input.alignment} \
+        -a {input.mirna} \
+        > {output.intersect} \
+        ) &> {log}"
+
+
+###############################################################################
+### isomiRs intersection
+###############################################################################
+
+
+# rule intersect_isomirs:
+#     input:
+#         alignment=os.path.join(
+#             config["output_dir"], "{sample}", "sorted.alignments.bed12"
+#         ),
+#         isomirs=config["isomirs_anno"],
+#     output:
+#         intersect=os.path.join(
+#             config["output_dir"], "{sample}", "intersect_isomirs.bed"
+#         ),
+#     params:
+#         cluster_log=os.path.join(
+#             config["cluster_log"], "intersection_isomirs_{sample}.log"
+#         ),
+#     log:
+#         os.path.join(config["local_log"], "intersection_isomirs_{sample}.log"),
+#     singularity:
+#         "docker://zavolab/bedtools:2.27.0"
+#     shell:
+#         "(bedtools intersect \
+#         -wao \
+#         -s \
+#         -F 1 \
+#         -sorted \
+#         -b {input.alignment} \
+#         -a {input.isomirs} \
+#         > {output.intersect} \
+#         ) &> {log}"
+
+
+###############################################################################
+### miRNAs counting table - miRNA
+###############################################################################
+
+
+rule quant_mirna:
+    input:
+        intersect=os.path.join(
+            config["output_dir"], "{sample}", "intersect_mirna.bed"
+        ),
+        script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
+    output:
+        table=os.path.join(
+            config["output_dir"], "TABLES", "miRNA_counts_{sample}"
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "quant_mirna_miRNA_{sample}.log"
+        ),
+        prefix=os.path.join(
+            config["output_dir"], "TABLES", "miRNA_counts_{sample}"
+        ),
+    log:
+        os.path.join(config["local_log"], "quant_mirna_miRNA_{sample}.log"),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python \
+        {input.script} \
+        -i {input.intersect} \
+        --uniq=miRNA \
+        -p={params.prefix} \
+        ) &> {log}"
+
+
+###############################################################################
+### miRNAs counting table - miRNA_primary
+###############################################################################
+
+
+rule quant_mirna_pri:
+    input:
+        intersect=os.path.join(
+            config["output_dir"], "{sample}", "intersect_mirna.bed"
+        ),
+        script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
+    output:
+        table=os.path.join(
+            config["output_dir"],
+            "TABLES",
+            "miRNA_primary_transcript_counts_{sample}",
+        ),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"],
+            "quant_mirna_miRNA_primary_transcript_{sample}.log",
+        ),
+        prefix=os.path.join(
+            config["output_dir"],
+            "TABLES",
+            "miRNA_primary_transcript_counts_{sample}",
+        ),
+    log:
+        os.path.join(
+            config["local_log"],
+            "quant_mirna_miRNA_primary_transcript_{sample}.log",
+        ),
+    singularity:
+        "docker://zavolab/python:3.6.5"
+    shell:
+        "(python \
+        {input.script} \
+        -i {input.intersect} \
+        --uniq=miRNA_primary_transcript \
+        -p={params.prefix} \
+        ) &> {log}"
+
+
+###############################################################################
+### isomiRs counting table
+###############################################################################
+
+
+# rule quant_isomirs:
+#     input:
+#         intersect=os.path.join(
+#             config["output_dir"], "{sample}", "intersect_isomirs.bed"
+#         ),
+#         script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
+#     output:
+#         table=os.path.join(
+#             config["output_dir"], "TABLES", "isomirs_counts_{sample}"
+#         ),
+#     params:
+#         cluster_log=os.path.join(
+#             config["cluster_log"], "quant_isomirs_{sample}.log"
+#         ),
+#         prefix=os.path.join(
+#             config["output_dir"], "TABLES", "isomirs_counts_{sample}"
+#         ),
+#     log:
+#         os.path.join(config["local_log"], "quant_isomirs_{sample}.log"),
+#     singularity:
+#         "docker://zavolab/python:3.6.5"
+#     shell:
+#         "(python \
+#         {input.script} \
+#         -i {input.intersect} \
+#         --uniq=miRNA \
+#         -p={params.prefix} \
+#         ) &> {log}"
+
+
+###############################################################################
+### Merge counting tables for all samples by mature/primary/isomirs forms.
+###############################################################################
+
+
+rule merge_tables:
+    input:
+        table=expand(
+            os.path.join(
+                config["output_dir"], "TABLES", "{mir}_counts_{sample}"
+            ),
+            sample=config["sample"],
+            mir=config["mir_list"],
+        ),
+        script=os.path.join(config["scripts_dir"], "merge_tables.R"),
+    output:
+        table=os.path.join(config["output_dir"], "TABLES", "counts.{mir}.tab"),
+    params:
+        cluster_log=os.path.join(
+            config["cluster_log"], "merge_tables_{mir}.log"
+        ),
+        prefix="{mir}_counts_",
+        input_dir=os.path.join(config["output_dir"], "TABLES"),
+    log:
+        os.path.join(config["local_log"], "merge_tables_{mir}.log"),
+    singularity:
+        "docker://zavolab/r-tidyverse:3.5.3"
+    shell:
+        "(Rscript \
+        {input.script} \
+        --input_dir {params.input_dir} \
+        --output_file {output.table} \
+        --prefix {params.prefix} \
+        --verbose \
+        ) &> {log}"
\ No newline at end of file
-- 
GitLab


From ed1083842770f7524d9066bb4f9b0c908cd59d88 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:20:45 +0000
Subject: [PATCH 07/22] Main snakefile (merged worflows)

---
 workflow/Snakefile | 108 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 workflow/Snakefile

diff --git a/workflow/Snakefile b/workflow/Snakefile
new file mode 100644
index 0000000..9467cd3
--- /dev/null
+++ b/workflow/Snakefile
@@ -0,0 +1,108 @@
+###############################################################################
+# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
+# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
+#
+# Snakemake workflow to:
+#   - download and prepare the necessary files for smallRNA-seq related workflows.
+#   - map small RNA-seq reads (e.g. from miRNA sequencing libraries)
+#   - quantify miRNAs, including isomiRs, from miRNA-seq alignments.
+#
+###############################################################################
+#
+# USAGE:
+# snakemake \
+#    --use-singularity \
+#    --singularity-args "--bind $PWD/../" \
+#    --cores 4 \
+#    --printshellcmds \
+#    --rerun-incomplete \
+#    --verbose
+#
+################################################################################
+
+import os
+
+###############################################################################
+### Including subworkflows
+###############################################################################
+
+include: os.path.join("rules" , "prepare.smk")
+include: os.path.join("rules" , "map.smk")
+include: os.path.join("rules" , "quantify.smk")
+
+###############################################################################
+### Finish rule
+###############################################################################
+
+
+rule finish:
+    input:
+        idx_transcriptome=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "transcriptome_index_segemehl.idx",
+            ),
+            organism=config["organism"],
+        ),
+        idx_genome=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "genome_index_segemehl.idx",
+            ),
+            organism=config["organism"],
+        ),
+        exons=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "exons.bed",
+            ),
+            organism=config["organism"],
+        ),
+        header=expand(
+            os.path.join(
+                config["output_dir"],
+                "{organism}",
+                "headerOfCollapsedFasta.sam",
+            ),
+            organism=config["organism"],
+        ),
+        mirnafilt=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "mirna_filtered.bed",
+            ),
+            organism=config["organism"],
+        ),
+        isomirs=expand(
+            os.path.join(
+                config["output_dir"], 
+                "{organism}", 
+                "isomirs_annotation.bed",
+            ),
+            organism=config["organism"],
+        ),
+        maps=expand(
+            os.path.join(
+                config["output_dir"],
+                "{sample}",
+                "convertedSortedMappings_{sample}.bam.bai",
+            ),
+            sample=config["sample"],
+        ),
+        table1=expand(
+            os.path.join(
+                config["output_dir"], 
+                "TABLES", 
+                "counts.{mir}.tab"
+            ),
+            mir=config["mir_list"],
+        ),
+        table2=os.path.join(
+            config["output_dir"], 
+            "TABLES", 
+            "counts.isomirs.tab"
+        ),
\ No newline at end of file
-- 
GitLab


From 47e2399fa403cb6e3286b2c064d36b1580924771 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:21:53 +0000
Subject: [PATCH 08/22] Template of the config.yaml

---
 workflow/config.yaml | 106 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 workflow/config.yaml

diff --git a/workflow/config.yaml b/workflow/config.yaml
new file mode 100644
index 0000000..25e52d7
--- /dev/null
+++ b/workflow/config.yaml
@@ -0,0 +1,106 @@
+---
+#### GLOBAL PARAMETERS #####
+
+# Directories
+# Usually there is no need to change these
+map_input_dir: "path/to/map_input_directory" # For the mapping worflow
+quantify_input_dir: "path/to/quantify_input_directory" # For the quantify worflow
+output_dir: "results"
+scripts_dir: "../scripts"
+local_log: "logs/local"
+cluster_log: "logs/cluster"
+
+# Inputs information
+sample: ["sample_1", "sample_2"]  # put all sample names, separated by comma
+
+#######################################################################################################
+####
+#### PREPARE PARAMETERS
+####
+#######################################################################################################
+
+# Isomirs annotation file
+# Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
+bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
+bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
+
+# List of inputs
+organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"]
+# this string specifies a path, and the "/" is important for this
+# "pre" specifies the assembly version
+
+
+#### PARAMETERS SPECIFIC TO INPUTS #####
+
+org/pre: # One section for each list item in "organism"; entry should match precisely what
+# is in the "organism" section above, one entry per list item above, omitting the ""
+  # URLs to genome, gene & miRNA annotations
+  genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style
+  # e.g. "ftp://ftp.ensembl.org/pub/release-106/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz"
+  gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style
+  # e.g. "ftp://ftp.ensembl.org/pub/release-106/gtf/homo_sapiens/Homo_sapiens.GRCh38.106.chr.gtf.gz"
+  mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style
+  # e.g. "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
+
+  # Chromosome name mappings between UCSC <-> Ensembl
+  # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
+  map_chr_url: # FTP/HTTP URL to mapping table
+  # e.g. "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
+  # Chromosome name mapping parameters:
+  column: 1 # Column number from input file where to change chromosome name
+  delimiter: "TAB" # Delimiter of the input file
+
+#######################################################################################################
+####
+#### MAP PARAMETERS
+####
+#######################################################################################################
+
+# Resources: genome, transcriptome, genes, miRs
+# All of these are produced by the "prepare" workflow
+genome: "path/to/genome.processed.fa"
+gtf: "path/to/gene_annotations.filtered.gtf"
+transcriptome: "path/to/transcriptome_idtrim.fa"
+transcriptome_index_segemehl: "path/to/transcriptome_index_segemehl.idx"
+genome_index_segemehl: "path/to/genome_index_segemehl.idx"
+exons: "path/to/exons.bed"
+header_of_collapsed_fasta: "path/to/headerOfCollapsedFasta.sam"
+
+# Tool parameters: quality filter
+q_value: 10  # Q (Phred) score; minimum quality score to keep
+p_value: 50  # minimum % of bases that must have Q quality
+
+# Tool parameters: adapter removal
+error_rate: 0.1  # fraction of allowed errors
+minimum_length: 15  # discard processed reads shorter than the indicated length
+overlap: 3  # minimum overlap length of adapter and read to trim the bases
+max_n: 0  # discard reads containing more than the indicated number of N bases
+
+# Tool parameters: mapping
+max_length_reads: 30  # maximum length of processed reads to map with oligomap
+nh: 100  # discard reads with more mappings than the indicated number
+
+
+#### PARAMETERS SPECIFIC TO INPUTS ####
+
+sample_1:  # one section per list item in "sample"; names have to match
+    adapter: "XXXXXXXXXXXXXXXXXXXX"  # 3' adapter sequence to trim
+    format: "fa"  # file format; currently supported: "fa"
+
+
+#######################################################################################################
+####
+#### QUANTIFY PARAMETERS
+####
+#######################################################################################################
+
+
+# Types of miRNAs to quantify
+# Remove miRNA types you are not interested in
+mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"]
+
+# Resources: miR annotations, chromosome name mappings
+# All of these are produced by the "prepare" workflow
+mirnas_anno: "path/to/mirna_filtered.bed"
+isomirs_anno: "path/to/isomirs_annotation.bed"
+...
\ No newline at end of file
-- 
GitLab


From 1fc05e9b0b8e07d78bc6b6b87100647f03eac033 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:24:18 +0000
Subject: [PATCH 09/22] Delete Snakefile

---
 workflow/map/Snakefile | 1007 ----------------------------------------
 1 file changed, 1007 deletions(-)
 delete mode 100644 workflow/map/Snakefile

diff --git a/workflow/map/Snakefile b/workflow/map/Snakefile
deleted file mode 100644
index aace1f1..0000000
--- a/workflow/map/Snakefile
+++ /dev/null
@@ -1,1007 +0,0 @@
-###############################################################################
-# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
-# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
-#
-# Workflow to map small RNA-seq reads (e.g. from miRNA sequencing libraries).
-###############################################################################
-
-import os
-
-
-# Rules that require internet connection for downloading files are included
-# in the localrules
-localrules:
-    finish,
-
-
-###############################################################################
-### Finish rule
-###############################################################################
-
-
-rule finish:
-    input:
-        maps=expand(
-            os.path.join(
-                config["output_dir"],
-                "{sample}",
-                "convertedSortedMappings_{sample}.bam.bai",
-            ),
-            sample=config["sample"],
-        ),
-
-
-###############################################################################
-### Uncompress fastq files
-###############################################################################
-
-
-rule uncompress_zipped_files:
-    input:
-        reads=os.path.join(config["input_dir"], "{sample}.{format}.gz"),
-    output:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "{format}", "reads.{format}"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "uncompress_zipped_files_{sample}_{format}.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"],
-            "uncompress_zipped_files_{sample}_{format}.log",
-        ),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(zcat {input.reads} > {output.reads}) &> {log}"
-
-
-###############################################################################
-### Quality filter
-###############################################################################
-
-
-rule fastq_quality_filter:
-    input:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "fastq", "reads.fastq"
-        ),
-    output:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "fastq_quality_filter_{sample}.log"
-        ),
-        p=config["p_value"],
-        q=config["q_value"],
-    log:
-        os.path.join(config["local_log"], "fastq_quality_filter_{sample}.log"),
-    singularity:
-        "docker://zavolab/fastx:0.0.14"
-    shell:
-        "(fastq_quality_filter \
-        -v \
-        -q {params.q} \
-        -p {params.p} \
-        -i {input.reads} \
-        > {output.reads} \
-        ) &> {log}"
-
-
-###############################################################################
-### Convert fastq to fasta
-###############################################################################
-
-
-rule fastq_to_fasta:
-    input:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq"
-        ),
-    output:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "fastq", "reads.fa"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "fastq_to_fasta_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log"),
-    singularity:
-        "docker://zavolab/fastx:0.0.14"
-    shell:
-        "(fastq_to_fasta -r -n -i {input.reads} > {output.reads}) &> {log}"
-
-
-###############################################################################
-### Format fasta file
-###############################################################################
-
-
-rule fasta_formatter:
-    input:
-        reads=lambda wildcards: os.path.join(
-            config["output_dir"],
-            wildcards.sample,
-            config[wildcards.sample]["format"],
-            "reads.fa",
-        ),
-    output:
-        reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "fasta_formatter_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "fasta_formatter_{sample}.log"),
-    singularity:
-        "docker://zavolab/fastx:0.0.14"
-    shell:
-        "(fasta_formatter -w 0 -i {input.reads} > {output.reads}) &> {log}"
-
-
-###############################################################################
-### Remove adapters
-###############################################################################
-
-
-rule cutadapt:
-    input:
-        reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"),
-    output:
-        reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "cutadapt_{sample}.log"
-        ),
-        adapter=lambda wildcards: config[wildcards.sample]["adapter"],
-        error_rate=config["error_rate"],
-        minimum_length=config["minimum_length"],
-        overlap=config["overlap"],
-        max_n=config["max_n"],
-    log:
-        os.path.join(config["local_log"], "cutadapt_{sample}.log"),
-    resources:
-        threads=8,
-    singularity:
-        "docker://zavolab/cutadapt:1.16"
-    shell:
-        "(cutadapt \
-        -a {params.adapter} \
-        --error-rate {params.error_rate} \
-        --minimum-length {params.minimum_length} \
-        --overlap {params.overlap} \
-        --trim-n \
-        --max-n {params.max_n} \
-        --cores {resources.threads} \
-        -o {output.reads} {input.reads}) &> {log}"
-
-
-###############################################################################
-### Collapse identical reads
-###############################################################################
-
-
-rule fastx_collapser:
-    input:
-        reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"),
-    output:
-        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "fastx_collapser_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "fastx_collapser_{sample}.log"),
-    singularity:
-        "docker://zavolab/fastx:0.0.14"
-    shell:
-        "(fastx_collapser -i {input.reads} > {output.reads}) &> {log}"
-
-
-###############################################################################
-### Segemehl genome mapping
-###############################################################################
-
-
-rule mapping_genome_segemehl:
-    input:
-        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
-        genome=config["genome"],
-        genome_index_segemehl=config["genome_index_segemehl"],
-    output:
-        gmap=os.path.join(
-            config["output_dir"], "{sample}", "segemehlGenome_map.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "mapping_genome_segemehl_{sample}.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "mapping_genome_segemehl_{sample}.log"
-        ),
-    resources:
-        mem=50,
-        time=12,
-        threads=8,
-    singularity:
-        "docker://zavolab/segemehl:0.2.0"
-    shell:
-        "(segemehl.x \
-        -i {input.genome_index_segemehl} \
-        -d {input.genome} \
-        -t {threads} \
-        -q {input.reads} \
-        -outfile {output.gmap} \
-        ) &> {log}"
-
-
-###############################################################################
-### Segemehl transcriptome mapping
-###############################################################################
-
-
-rule mapping_transcriptome_segemehl:
-    input:
-        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
-        transcriptome=config["transcriptome"],
-        transcriptome_index_segemehl=config["transcriptome_index_segemehl"],
-    output:
-        tmap=os.path.join(
-            config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "mapping_transcriptome_segemehl_{sample}.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "mapping_transcriptome_segemehl_{sample}.log"
-        ),
-    resources:
-        mem=10,
-        time=12,
-        threads=8,
-    singularity:
-        "docker://zavolab/segemehl:0.2.0"
-    shell:
-        "(segemehl.x \
-        -i {input.transcriptome_index_segemehl} \
-        -d {input.transcriptome} \
-        -t {threads} \
-        -q {input.reads} \
-        -outfile {output.tmap} \
-        ) &> {log}"
-
-
-###############################################################################
-### Filter fasta for oligomap mapping
-###############################################################################
-
-
-rule filter_fasta_for_oligomap:
-    input:
-        reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"),
-        script=os.path.join(config["scripts_dir"], "validation_fasta.py"),
-    output:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "filter_fasta_for_oligomap_{sample}.log"
-        ),
-        max_length_reads=config["max_length_reads"],
-    log:
-        os.path.join(
-            config["local_log"], "filter_fasta_for_oligomap_{sample}.log"
-        ),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python {input.script} \
-        -r {params.max_length_reads} \
-        -i {input.reads} \
-        -o {output.reads} \
-        ) &> {log}"
-
-
-###############################################################################
-### Oligomap genome mapping
-###############################################################################
-
-
-rule mapping_genome_oligomap:
-    input:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
-        ),
-        target=config["genome"],
-    output:
-        gmap=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_map.fa"
-        ),
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_report.txt"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "mapping_genome_oligomap_{sample}.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "mapping_genome_oligomap_{sample}.log"
-        ),
-    resources:
-        mem=50,
-        time=6,
-        threads=8,
-    singularity:
-        "docker://zavolab/oligomap:1.0"
-    shell:
-        "(oligomap \
-        {input.target} \
-        {input.reads} \
-        -r {output.report} \
-        > {output.gmap} \
-        ) &> {log}"
-
-
-###############################################################################
-### Oligomap genome sorting
-###############################################################################
-
-
-rule sort_genome_oligomap:
-    input:
-        tmap=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_map.fa"
-        ),
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_report.txt"
-        ),
-        script=os.path.join(config["scripts_dir"], "blocksort.sh"),
-    output:
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_sorted.fa"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "sorting_genome_oligomap_{sample}.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "sorting_genome_oligomap_{sample}.log"
-        ),
-    resources:
-        threads=8,
-        time=6,
-    shell:
-        "(bash {input.script} \
-        {input.tmap} \
-        {resources.threads} \
-        {output.sort} \
-        ) &> {log}"
-
-
-###############################################################################
-### Oligomap genome mapping output to SAM
-###############################################################################
-
-
-rule oligomap_genome_toSAM:
-    input:
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_report.txt"
-        ),
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_sorted.fa"
-        ),
-        script=os.path.join(
-            config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py"
-        ),
-    output:
-        gmap=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_converted.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "oligomap_genome_toSAM_{sample}.log"
-        ),
-        nh=config["nh"],
-    log:
-        os.path.join(config["local_log"], "oligomap_genome_toSAM_{sample}.log"),
-    resources:
-        time=1,
-        queue=1,
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python {input.script} \
-        -i {input.sort} \
-        -n {params.nh} \
-        > {output.gmap}) &> {log}"
-
-
-###############################################################################
-### Oligomap trancriptome mapping
-###############################################################################
-
-
-rule mapping_transcriptome_oligomap:
-    input:
-        reads=os.path.join(
-            config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"
-        ),
-        target=config["transcriptome"],
-    output:
-        tmap=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"
-        ),
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "mapping_transcriptome_oligomap_{sample}.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "mapping_transcriptome_oligomap_{sample}.log"
-        ),
-    resources:
-        mem=10,
-        time=6,
-        threads=8,
-    singularity:
-        "docker://zavolab/oligomap:1.0"
-    shell:
-        "(oligomap \
-        {input.target} \
-        {input.reads} \
-        -s \
-        -r {output.report} \
-        > {output.tmap} \
-        ) &> {log}"
-
-
-###############################################################################
-### Oligomap trancriptome sorting
-###############################################################################
-
-
-rule sort_transcriptome_oligomap:
-    input:
-        tmap=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"
-        ),
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
-        ),
-        script=os.path.join(config["scripts_dir"], "blocksort.sh"),
-    output:
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "sorting_transcriptome_oligomap_{sample}.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "sorting_transcriptome_oligomap_{sample}.log"
-        ),
-    resources:
-        threads=8,
-    shell:
-        "(bash {input.script} \
-        {input.tmap} \
-        {resources.threads} \
-        {output.sort} \
-        ) &> {log}"
-
-
-###############################################################################
-### Oligomap transcriptome mapping ouput to SAM
-###############################################################################
-
-
-rule oligomap_transcriptome_toSAM:
-    input:
-        report=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"
-        ),
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa"
-        ),
-        script=os.path.join(
-            config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py"
-        ),
-    output:
-        tmap=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "oligoTranscriptome_converted.sam",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "oligomap_transcriptome_toSAM_{sample}.log"
-        ),
-        nh=config["nh"],
-    log:
-        os.path.join(
-            config["local_log"], "oligomap_transcriptome_toSAM_{sample}.log"
-        ),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python {input.script} \
-        -i {input.sort} \
-        -n {params.nh} \
-        > {output.tmap} \
-        ) &> {log}"
-
-
-###############################################################################
-### Merge genome mappings
-###############################################################################
-
-
-rule merge_genome_maps:
-    input:
-        gmap1=os.path.join(
-            config["output_dir"], "{sample}", "segemehlGenome_map.sam"
-        ),
-        gmap2=os.path.join(
-            config["output_dir"], "{sample}", "oligoGenome_converted.sam"
-        ),
-    output:
-        gmaps=os.path.join(
-            config["output_dir"], "{sample}", "GenomeMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "merge_genome_maps_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "merge_genome_maps_{sample}.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cat {input.gmap1} {input.gmap2} > {output.gmaps}) &> {log}"
-
-
-###############################################################################
-### Merge trancriptome mappings
-###############################################################################
-
-
-rule merge_transcriptome_maps:
-    input:
-        tmap1=os.path.join(
-            config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam"
-        ),
-        tmap2=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "oligoTranscriptome_converted.sam",
-        ),
-    output:
-        tmaps=os.path.join(
-            config["output_dir"], "{sample}", "TranscriptomeMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "merge_transcriptome_maps_{sample}.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "merge_transcriptome_maps_{sample}.log"
-        ),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cat {input.tmap1} {input.tmap2} > {output.tmaps}) &> {log}"
-
-
-###############################################################################
-### Filter NH genome
-###############################################################################
-
-
-rule nh_filter_genome:
-    input:
-        gmaps=os.path.join(
-            config["output_dir"], "{sample}", "GenomeMappings.sam"
-        ),
-        script=os.path.join(config["scripts_dir"], "nh_filter.py"),
-    output:
-        gmaps=os.path.join(
-            config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "nh_filter_genome_{sample}.log"
-        ),
-        nh=config["nh"],
-    log:
-        os.path.join(config["local_log"], "nh_filter_genome_{sample}.log"),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python {input.script} \
-        {input.gmaps} \
-        {params.nh} \
-        {output.gmaps} \
-        ) &> {log}"
-
-
-###############################################################################
-### Filter NH transcriptome
-###############################################################################
-
-
-rule filter_nh_transcriptome:
-    input:
-        tmaps=os.path.join(
-            config["output_dir"], "{sample}", "TranscriptomeMappings.sam"
-        ),
-        script=os.path.join(config["scripts_dir"], "nh_filter.py"),
-    output:
-        tmaps=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "nhfiltered_TranscriptomeMappings.sam",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "filter_nh_transcriptome_{sample}.log"
-        ),
-        nh=config["nh"],
-    log:
-        os.path.join(
-            config["local_log"], "filter_nh_transcriptome_{sample}.log"
-        ),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python {input.script} \
-        {input.tmaps} \
-        {params.nh} \
-        {output.tmaps} \
-        ) &> {log}"
-
-
-###############################################################################
-### Remove header genome mappings
-###############################################################################
-
-
-rule remove_headers_genome:
-    input:
-        gmap=os.path.join(
-            config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam"
-        ),
-    output:
-        gmap=os.path.join(
-            config["output_dir"], "{sample}", "noheader_GenomeMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "remove_headers_genome_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "remove_headers_genome_{sample}.log"),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "samtools view {input.gmap} > {output.gmap}"
-
-
-###############################################################################
-### Remove header transcriptome mappings
-###############################################################################
-
-
-rule remove_headers_transcriptome:
-    input:
-        tmap=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "nhfiltered_TranscriptomeMappings.sam",
-        ),
-    output:
-        tmap=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "noheader_TranscriptomeMappings.sam",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "remove_headers_transcriptome_{sample}.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "remove_headers_transcriptome_{sample}.log"
-        ),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "samtools view {input.tmap} > {output.tmap}"
-
-
-###############################################################################
-### Transcriptome to genome coordinates
-###############################################################################
-
-
-rule trans_to_gen:
-    input:
-        tmap=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "noheader_TranscriptomeMappings.sam",
-        ),
-        script=os.path.join(config["scripts_dir"], "sam_trx_to_sam_gen.pl"),
-        exons=config["exons"],
-    output:
-        genout=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "trans_to_gen_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "trans_to_gen_{sample}.log"),
-    singularity:
-        "docker://zavolab/perl:5.28"
-    shell:
-        "(perl {input.script} \
-        --in {input.tmap} \
-        --exons {input.exons} \
-        --out {output.genout} \
-        ) &> {log}"
-
-
-###############################################################################
-### Concatenate genome and trancriptome mappings
-###############################################################################
-
-
-rule cat_mapping:
-    input:
-        gmap1=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"),
-        gmap2=os.path.join(
-            config["output_dir"], "{sample}", "noheader_GenomeMappings.sam"
-        ),
-    output:
-        catmaps=os.path.join(
-            config["output_dir"], "{sample}", "catMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "cat_mapping_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "cat_mapping_{sample}.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cat {input.gmap1} {input.gmap2} > {output.catmaps}) &> {log}"
-
-
-###############################################################################
-### Add header
-###############################################################################
-
-
-rule add_header:
-    input:
-        header=config["header_of_collapsed_fasta"],
-        catmaps=os.path.join(
-            config["output_dir"], "{sample}", "catMappings.sam"
-        ),
-    output:
-        concatenate=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "concatenated_header_catMappings.sam",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "add_header_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "add_header_{sample}.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cat {input.header} {input.catmaps} > {output.concatenate}) &> {log}"
-
-
-###############################################################################
-### Sort mapped file by IDs
-###############################################################################
-
-
-rule sort_id:
-    input:
-        concatenate=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "concatenated_header_catMappings.sam",
-        ),
-    output:
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "header_sorted_catMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(config["cluster_log"], "sort_id_{sample}.log"),
-    log:
-        os.path.join(config["local_log"], "sort_id_{sample}.log"),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools sort -n -o {output.sort} {input.concatenate}) &> {log}"
-
-
-###############################################################################
-### Remove inferior mappings (keeping multimappers)
-###############################################################################
-
-
-rule remove_inferiors:
-    input:
-        sort=os.path.join(
-            config["output_dir"], "{sample}", "header_sorted_catMappings.sam"
-        ),
-        script=os.path.join(
-            config["scripts_dir"],
-            "sam_remove_duplicates_inferior_alignments_multimappers.1_5.pl",
-        ),
-    output:
-        remove_inf=os.path.join(
-            config["output_dir"], "{sample}", "removeInferiors.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "remove_inferiors_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "remove_inferiors_{sample}.log"),
-    resources:
-        mem=15,
-        threads=4,
-    singularity:
-        "docker://zavolab/perl:5.28"
-    shell:
-        "(perl {input.script} \
-        --print-header \
-        --keep-mm \
-        --in {input.sort} \
-        --out {output.remove_inf} \
-        ) &> {log}"
-
-
-###############################################################################
-### Uncollapse reads
-###############################################################################
-
-
-rule uncollapse_reads:
-    input:
-        maps=os.path.join(
-            config["output_dir"], "{sample}", "removeInferiors.sam"
-        ),
-        script=os.path.join(config["scripts_dir"], "sam_uncollapse.pl"),
-    output:
-        maps=os.path.join(
-            config["output_dir"], "{sample}", "uncollapsedMappings.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "uncollapse_reads_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "uncollapse_reads_{sample}.log"),
-    singularity:
-        "docker://zavolab/perl:5.28"
-    shell:
-        "(perl {input.script} \
-        --suffix \
-        --in {input.maps} \
-        --out {output.maps} \
-        ) &> {log}"
-
-
-###############################################################################
-### Convert SAM to BAM
-###############################################################################
-
-
-rule convert_to_bam:
-    input:
-        maps=os.path.join(
-            config["output_dir"], "{sample}", "uncollapsedMappings.sam"
-        ),
-    output:
-        maps=os.path.join(
-            config["output_dir"], "{sample}", "mappingsConverted.bam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "convert_to_bam_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "convert_to_bam_{sample}.log"),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools view -b {input.maps} > {output.maps}) &> {log}"
-
-
-###############################################################################
-### Sort by coordinate position
-###############################################################################
-
-
-rule sort_by_position:
-    input:
-        maps=os.path.join(
-            config["output_dir"], "{sample}", "mappingsConverted.bam"
-        ),
-    output:
-        maps=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "convertedSortedMappings_{sample}.bam",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "sort_by_position_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "sort_by_position_{sample}.log"),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools sort {input.maps} > {output.maps}) &> {log}"
-
-
-###############################################################################
-### Create bam index
-###############################################################################
-
-
-rule index_bam:
-    input:
-        maps=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "convertedSortedMappings_{sample}.bam",
-        ),
-    output:
-        maps=os.path.join(
-            config["output_dir"],
-            "{sample}",
-            "convertedSortedMappings_{sample}.bam.bai",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "index_bam_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "index_bam_{sample}.log"),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools index -b {input.maps} > {output.maps}) &> {log}"
-- 
GitLab


From 657972b93e30a3427b51066be1f69b3819d36e13 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:24:32 +0000
Subject: [PATCH 10/22] Delete Snakefile

---
 workflow/prepare/Snakefile | 731 -------------------------------------
 1 file changed, 731 deletions(-)
 delete mode 100644 workflow/prepare/Snakefile

diff --git a/workflow/prepare/Snakefile b/workflow/prepare/Snakefile
deleted file mode 100644
index c16afb8..0000000
--- a/workflow/prepare/Snakefile
+++ /dev/null
@@ -1,731 +0,0 @@
-###############################################################################
-# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
-# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
-#
-# Snakemake workflow to download and prepare the necessary files for
-# smallRNA-seq related workflows.
-###############################################################################
-
-import os
-
-
-# Rules that require internet connection for downloading files are included
-# in the localrules
-localrules:
-    finish,
-    genome_process,
-    filter_anno_gtf,
-    mirna_anno,
-    dict_chr,
-
-
-###############################################################################
-### Finish rule
-###############################################################################
-
-
-rule finish:
-    input:
-        idx_transcriptome=expand(
-            os.path.join(
-                config["output_dir"],
-                "{organism}",
-                "transcriptome_index_segemehl.idx",
-            ),
-            organism=config["organism"],
-        ),
-        idx_genome=expand(
-            os.path.join(
-                config["output_dir"],
-                "{organism}",
-                "genome_index_segemehl.idx",
-            ),
-            organism=config["organism"],
-        ),
-        exons=expand(
-            os.path.join(config["output_dir"], "{organism}", "exons.bed"),
-            organism=config["organism"],
-        ),
-        header=expand(
-            os.path.join(
-                config["output_dir"],
-                "{organism}",
-                "headerOfCollapsedFasta.sam",
-            ),
-            organism=config["organism"],
-        ),
-        mirnafilt=expand(
-            os.path.join(
-                config["output_dir"], "{organism}", "mirna_filtered.bed"
-            ),
-            organism=config["organism"],
-        ),
-        isomirs=expand(
-            os.path.join(
-                config["output_dir"], "{organism}", "isomirs_annotation.bed"
-            ),
-            organism=config["organism"],
-        ),
-
-
-###############################################################################
-### Download and process genome IDs
-###############################################################################
-
-
-rule genome_process:
-    input:
-        script=os.path.join(config["scripts_dir"], "genome_process.sh"),
-    output:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    params:
-        url=lambda wildcards: config[wildcards.organism]["genome_url"],
-        dir_out=os.path.join(config["output_dir"], "{organism}"),
-    log:
-        os.path.join(config["local_log"], "{organism}", "genome_process.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(bash {input.script} {params.dir_out} {log} {params.url})"
-
-
-###############################################################################
-### Download and filter gtf by transcript_level
-###############################################################################
-
-
-rule filter_anno_gtf:
-    input:
-        script=os.path.join(config["scripts_dir"], "filter_anno_gtf.sh"),
-    output:
-        gtf=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "gene_annotations.filtered.gtf",
-        ),
-    params:
-        url=lambda wildcards: config[wildcards.organism]["gtf_url"],
-        dir_out=os.path.join(config["output_dir"], "{organism}"),
-    log:
-        os.path.join(config["local_log"], "{organism}", "filter_anno_gtf.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(bash {input.script} {params.dir_out} {log} {params.url}) &> {log}"
-
-
-###############################################################################
-### Extract transcriptome sequences in FASTA from genome.
-###############################################################################
-
-
-rule extract_transcriptome_seqs:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-        gtf=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "gene_annotations.filtered.gtf",
-        ),
-    output:
-        fasta=os.path.join(
-            config["output_dir"], "{organism}", "transcriptome.fa"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "{organism}",
-            "extract_transcriptome_seqs.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"],
-            "{organism}",
-            "extract_transcriptome_seqs.log",
-        ),
-    singularity:
-        "docker://zavolab/cufflinks:2.2.1"
-    shell:
-        "(gffread -w {output.fasta} -g {input.genome} {input.gtf}) &> {log}"
-
-
-###############################################################################
-### Trim transcript IDs from FASTA file
-###############################################################################
-
-
-rule trim_fasta:
-    input:
-        fasta=os.path.join(
-            config["output_dir"], "{organism}", "transcriptome.fa"
-        ),
-        script=os.path.join(config["scripts_dir"], "validation_fasta.py"),
-    output:
-        fasta=os.path.join(
-            config["output_dir"], "{organism}", "transcriptome_idtrim.fa"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "trim_fasta.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "trim_fasta.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        """(awk \
-        -F" " \
-        "/^>/ {{print \$1; next}} 1" \
-        {input.fasta} \
-        > {output.fasta} \
-        ) &> {log}"""
-
-
-###############################################################################
-### Generate segemehl index for transcripts
-###############################################################################
-
-
-rule generate_segemehl_index_transcriptome:
-    input:
-        fasta=os.path.join(
-            config["output_dir"], "{organism}", "transcriptome_idtrim.fa"
-        ),
-    output:
-        idx=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "transcriptome_index_segemehl.idx",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "{organism}",
-            "generate_segemehl_index_transcriptome.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"],
-            "{organism}",
-            "generate_segemehl_index_transcriptome.log",
-        ),
-    resources:
-        mem=10,
-        threads=8,
-        time=6,
-    singularity:
-        "docker://zavolab/segemehl:0.2.0"
-    shell:
-        "(segemehl.x -x {output.idx} -d {input.fasta}) &> {log}"
-
-
-###############################################################################
-### Generate segemehl index for genome
-###############################################################################
-
-
-rule generate_segemehl_index_genome:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    output:
-        idx=os.path.join(
-            config["output_dir"], "{organism}", "genome_index_segemehl.idx"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "{organism}",
-            "generate_segemehl_index_genome.log",
-        ),
-    log:
-        os.path.join(
-            config["local_log"],
-            "{organism}",
-            "generate_segemehl_index_genome.log",
-        ),
-    resources:
-        mem=60,
-        threads=8,
-        time=6,
-    singularity:
-        "docker://zavolab/segemehl:0.2.0"
-    shell:
-        "(segemehl.x -x {output.idx} -d {input.genome}) &> {log}"
-
-
-###############################################################################
-### GTF file of exons (genomic coordinates)
-###############################################################################
-
-
-rule get_exons_gtf:
-    input:
-        gtf=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "gene_annotations.filtered.gtf",
-        ),
-        script=os.path.join(config["scripts_dir"], "get_lines_w_pattern.sh"),
-    output:
-        exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "get_exons_gtf.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "get_exons_gtf.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(bash \
-        {input.script} \
-        -f {input.gtf} \
-        -c 3 \
-        -p exon \
-        -o {output.exons} \
-        ) &> {log}"
-
-
-###############################################################################
-### Convert GTF file of exons to BED file
-###############################################################################
-
-
-rule gtftobed:
-    input:
-        exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"),
-        script=os.path.join(config["scripts_dir"], "gtf_exons_bed.1.1.2.R"),
-    output:
-        exons=os.path.join(config["output_dir"], "{organism}", "exons.bed"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "gtftobed.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "gtftobed.log"),
-    singularity:
-        "docker://zavolab/r-zavolab:3.5.1"
-    shell:
-        "(Rscript \
-        {input.script} \
-        --gtf {input.exons} \
-        -o {output.exons} \
-        ) &> {log}"
-
-
-###############################################################################
-### Create header for SAM file
-###############################################################################
-
-
-rule create_header_genome:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    output:
-        header=os.path.join(
-            config["output_dir"], "{organism}", "headerOfCollapsedFasta.sam"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "create_header_genome.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "{organism}", "create_header_genome.log"
-        ),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools dict -o {output.header} --uri=NA {input.genome}) &> {log}"
-
-
-###############################################################################
-### Download miRNA annotation
-###############################################################################
-
-
-rule mirna_anno:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    output:
-        anno=os.path.join(
-            config["output_dir"], "{organism}", "raw", "mirna.gff3"
-        ),
-    params:
-        anno=lambda wildcards: config[wildcards.organism]["mirna_url"],
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "mirna_anno.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "mirna_anno.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(wget {params.anno} -O {output.anno}) &> {log}"
-
-
-###############################################################################
-### Download dictionary mapping chr
-###############################################################################
-
-
-rule dict_chr:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    output:
-        map_chr=os.path.join(
-            config["output_dir"], "{organism}", "UCSC2ensembl.txt"
-        ),
-    params:
-        map_chr=lambda wildcards: config[wildcards.organism]["map_chr_url"],
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "dict_chr.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "dict_chr.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(wget {params.map_chr} -O {output.map_chr}) &> {log}"
-
-
-###############################################################################
-### Mapping chromosomes names, UCSC <-> ENSEMBL
-###############################################################################
-
-
-rule map_chr_names:
-    input:
-        anno=os.path.join(
-            config["output_dir"], "{organism}", "raw", "mirna.gff3"
-        ),
-        script=os.path.join(config["scripts_dir"], "map_chromosomes.pl"),
-        map_chr=os.path.join(
-            config["output_dir"], "{organism}", "UCSC2ensembl.txt"
-        ),
-    output:
-        gff=os.path.join(
-            config["output_dir"], "{organism}", "mirna_chr_mapped.gff3"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "map_chr_names.log"
-        ),
-        column=lambda wildcards: config[wildcards.organism]["column"],
-        delimiter=lambda wildcards: config[wildcards.organism]["delimiter"],
-    log:
-        os.path.join(config["local_log"], "{organism}", "map_chr_names.log"),
-    singularity:
-        "docker://zavolab/perl:5.28"
-    shell:
-        "(perl {input.script} \
-        {input.anno} \
-        {params.column} \
-        {params.delimiter} \
-        {input.map_chr} \
-        {output.gff} \
-        ) &> {log}"
-
-
-###############################################################################
-### Filtering _1 miR IDs
-###############################################################################
-
-
-rule filter_mir_1_anno:
-    input:
-        gff=os.path.join(
-            config["output_dir"], "{organism}", "mirna_chr_mapped.gff3"
-        ),
-    output:
-        gff=os.path.join(
-            config["output_dir"], "{organism}", "mirna_filtered.gff3"
-        ),
-    params:
-        script=os.path.join(config["scripts_dir"], "filter_mir_1_anno.sh"),
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "filter_mir_1_anno.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "{organism}", "filter_mir_1_anno.log"
-        ),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(bash {params.script} -f {input.gff} -o {output.gff}) &> {log}"
-
-
-###############################################################################
-### GFF to BED (improve intersect memory efficient allowing to use -sorted)
-###############################################################################
-
-
-rule gfftobed:
-    input:
-        gff=os.path.join(
-            config["output_dir"], "{organism}", "mirna_filtered.gff3"
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "mirna_filtered.bed"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "gfftobed.log"
-        ),
-        out_dir=os.path.join(config["output_dir"]),
-    log:
-        os.path.join(config["local_log"], "{organism}", "gfftobed.log"),
-    singularity:
-        "docker://zavolab/bedops:2.4.35"
-    shell:
-        "(convert2bed -i gff < {input.gff} \
-        --sort-tmpdir={params.out_dir} \
-        > {output.bed} \
-        ) &> {log}"
-
-
-###############################################################################
-### Index genome fasta file
-###############################################################################
-
-
-rule create_index_fasta:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa"
-        ),
-    output:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa.fai"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "create_index_fasta.log"
-        ),
-    log:
-        os.path.join(
-            config["local_log"], "{organism}", "create_index_fasta.log"
-        ),
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    shell:
-        "(samtools faidx {input.genome}) &> {log}"
-
-
-###############################################################################
-### Extract chromosome length
-###############################################################################
-
-
-rule extract_chr_len:
-    input:
-        genome=os.path.join(
-            config["output_dir"], "{organism}", "genome.processed.fa.fai"
-        ),
-    output:
-        chrsize=os.path.join(
-            config["output_dir"], "{organism}", "chr_size.txt"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "extract_chr_len.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "extract_chr_len.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cut -f1,2 {input.genome} > {output.chrsize}) &> {log}"
-
-
-###############################################################################
-### Extract mature miRNA
-###############################################################################
-
-
-rule filter_mature_mirs:
-    input:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "mirna_filtered.bed"
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "mirna_mature_filtered.bed"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "filter_mature_mirs.log"
-        ),
-        precursor="miRNA_primary_transcript",
-    log:
-        os.path.join(
-            config["local_log"], "{organism}", "filter_mature_mirs.log"
-        ),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(grep -v {params.precursor} {input.bed} > {output.bed}) &> {log}"
-
-
-###############################################################################
-### Create isomirs annotation file from mature miRNA
-###############################################################################
-
-
-rule iso_anno:
-    input:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "mirna_mature_filtered.bed"
-        ),
-        chrsize=os.path.join(
-            config["output_dir"], "{organism}", "chr_size.txt"
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "iso_anno_5p{bp_5p}_3p{bp_3p}.bed",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "{organism}",
-            "iso_anno_5p{bp_5p}_3p{bp_3p}.log",
-        ),
-        bp_5p=lambda wildcards: wildcards.bp_5p,
-        bp_3p=lambda wildcards: wildcards.bp_3p,
-    log:
-        os.path.join(
-            config["local_log"],
-            "{organism}",
-            "iso_anno_5p{bp_5p}_3p{bp_3p}.log",
-        ),
-    singularity:
-        "docker://zavolab/bedtools:2.28.0"
-    shell:
-        "(bedtools slop \
-        -i {input.bed} \
-        -g {input.chrsize} \
-        -l {params.bp_5p} \
-        -r {params.bp_3p} \
-        > {output.bed} \
-        ) &> {log}"
-
-
-###############################################################################
-### Change miRNA names to isomirs names
-###############################################################################
-
-
-rule iso_anno_rename:
-    input:
-        bed=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "iso_anno_5p{bp_5p}_3p{bp_3p}.bed",
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"],
-            "{organism}",
-            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "{organism}",
-            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log",
-        ),
-        bp_5p=lambda wildcards: wildcards.bp_5p,
-        bp_3p=lambda wildcards: wildcards.bp_3p,
-    log:
-        os.path.join(
-            config["local_log"],
-            "{organism}",
-            "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log",
-        ),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(sed \
-        's/;Derives/_5p{params.bp_5p}_3p{params.bp_3p};Derives/' \
-        {input.bed} \
-        > {output.bed} \
-        ) &> {log}"
-
-
-###############################################################################
-### Concatenate all isomirs annotation files
-###############################################################################
-
-
-rule iso_anno_concat:
-    input:
-        bed=lambda wildcards: expand(
-            os.path.join(
-                config["output_dir"],
-                "{organism}",
-                "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed",
-            ),
-            organism=config["organism"],
-            bp_3p=config["bp_3p"],
-            bp_5p=config["bp_5p"],
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "iso_anno_concat.bed"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "iso_anno_concat.log"
-        ),
-        prefix=os.path.join(
-            config["output_dir"], "{organism}", "iso_anno_rename"
-        ),
-    log:
-        os.path.join(config["local_log"], "{organism}", "iso_anno_concat.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(cat {params.prefix}* > {output.bed}) &> {log}"
-
-
-###############################################################################
-### Remove non changing isomirs (5p0_3p0)
-###############################################################################
-
-
-rule iso_anno_final:
-    input:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "iso_anno_concat.bed"
-        ),
-    output:
-        bed=os.path.join(
-            config["output_dir"], "{organism}", "isomirs_annotation.bed"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "{organism}", "iso_anno_final.log"
-        ),
-        pattern="5p0_3p0",
-    log:
-        os.path.join(config["local_log"], "{organism}", "iso_anno_final.log"),
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(grep -v '{params.pattern}' {input.bed} > {output.bed}) &> {log}"
-- 
GitLab


From 0217211515a584f01a37b1baf182d2a85dba1fd9 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:24:43 +0000
Subject: [PATCH 11/22] Delete Snakefile

---
 workflow/quantify/Snakefile | 317 ------------------------------------
 1 file changed, 317 deletions(-)
 delete mode 100644 workflow/quantify/Snakefile

diff --git a/workflow/quantify/Snakefile b/workflow/quantify/Snakefile
deleted file mode 100644
index b30667d..0000000
--- a/workflow/quantify/Snakefile
+++ /dev/null
@@ -1,317 +0,0 @@
-###############################################################################
-# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel
-# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu
-#
-# Pipeline to quantify miRNAs, including isomiRs, from miRNA-seq alignments.
-###############################################################################
-
-import os
-
-
-# Rules that require internet connection for downloading files are included
-# in the localrules
-localrules:
-    finish,
-
-
-###############################################################################
-### Finish rule
-###############################################################################
-
-
-rule finish:
-    input:
-        table1=expand(
-            os.path.join(config["output_dir"], "TABLES", "counts.{mir}.tab"),
-            mir=config["mir_list"],
-        ),
-        table2=os.path.join(
-            config["output_dir"], "TABLES", "counts.isomirs.tab"
-        ),
-
-
-###############################################################################
-### BAM to BED
-###############################################################################
-
-
-rule bamtobed:
-    input:
-        alignment=os.path.join(
-            config["input_dir"],
-            "{sample}",
-            "convertedSortedMappings_{sample}.bam",
-        ),
-    output:
-        alignment=os.path.join(
-            config["output_dir"], "{sample}", "alignments.bed12"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "bamtobed_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "bamtobed_{sample}.log"),
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-    shell:
-        "(bedtools bamtobed \
-        -bed12 \
-        -tag NH \
-        -i {input.alignment} \
-        > {output.alignment} \
-        ) &> {log}"
-
-
-###############################################################################
-### Sort alignments
-###############################################################################
-
-
-rule sort_alignments:
-    input:
-        alignment=os.path.join(
-            config["output_dir"], "{sample}", "alignments.bed12"
-        ),
-    output:
-        alignment=os.path.join(
-            config["output_dir"], "{sample}", "sorted.alignments.bed12"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "sortalignment_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "sortalignment_{sample}.log"),
-    resources:
-        mem=4,
-        threads=8,
-    singularity:
-        "docker://zavolab/ubuntu:18.04"
-    shell:
-        "(sort \
-        -k1,1 \
-        -k2,2n \
-        {input.alignment} \
-        > {output.alignment} \
-        ) &> {log}"
-
-
-###############################################################################
-### miRNAs intersection
-###############################################################################
-
-
-rule intersect_mirna:
-    input:
-        alignment=os.path.join(
-            config["output_dir"], "{sample}", "sorted.alignments.bed12"
-        ),
-        mirna=config["mirnas_anno"],
-    output:
-        intersect=os.path.join(
-            config["output_dir"], "{sample}", "intersect_mirna.bed"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "intersection_mirna_{sample}.log"
-        ),
-    log:
-        os.path.join(config["local_log"], "intersection_mirna_{sample}.log"),
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-    shell:
-        "(bedtools intersect \
-        -wao \
-        -s \
-        -F 1 \
-        -sorted \
-        -b {input.alignment} \
-        -a {input.mirna} \
-        > {output.intersect} \
-        ) &> {log}"
-
-
-###############################################################################
-### isomiRs intersection
-###############################################################################
-
-
-# rule intersect_isomirs:
-#     input:
-#         alignment=os.path.join(
-#             config["output_dir"], "{sample}", "sorted.alignments.bed12"
-#         ),
-#         isomirs=config["isomirs_anno"],
-#     output:
-#         intersect=os.path.join(
-#             config["output_dir"], "{sample}", "intersect_isomirs.bed"
-#         ),
-#     params:
-#         cluster_log=os.path.join(
-#             config["cluster_log"], "intersection_isomirs_{sample}.log"
-#         ),
-#     log:
-#         os.path.join(config["local_log"], "intersection_isomirs_{sample}.log"),
-#     singularity:
-#         "docker://zavolab/bedtools:2.27.0"
-#     shell:
-#         "(bedtools intersect \
-#         -wao \
-#         -s \
-#         -F 1 \
-#         -sorted \
-#         -b {input.alignment} \
-#         -a {input.isomirs} \
-#         > {output.intersect} \
-#         ) &> {log}"
-
-
-###############################################################################
-### miRNAs counting table - miRNA
-###############################################################################
-
-
-rule quant_mirna:
-    input:
-        intersect=os.path.join(
-            config["output_dir"], "{sample}", "intersect_mirna.bed"
-        ),
-        script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
-    output:
-        table=os.path.join(
-            config["output_dir"], "TABLES", "miRNA_counts_{sample}"
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "quant_mirna_miRNA_{sample}.log"
-        ),
-        prefix=os.path.join(
-            config["output_dir"], "TABLES", "miRNA_counts_{sample}"
-        ),
-    log:
-        os.path.join(config["local_log"], "quant_mirna_miRNA_{sample}.log"),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python \
-        {input.script} \
-        -i {input.intersect} \
-        --uniq=miRNA \
-        -p={params.prefix} \
-        ) &> {log}"
-
-
-###############################################################################
-### miRNAs counting table - miRNA_primary
-###############################################################################
-
-
-rule quant_mirna_pri:
-    input:
-        intersect=os.path.join(
-            config["output_dir"], "{sample}", "intersect_mirna.bed"
-        ),
-        script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
-    output:
-        table=os.path.join(
-            config["output_dir"],
-            "TABLES",
-            "miRNA_primary_transcript_counts_{sample}",
-        ),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"],
-            "quant_mirna_miRNA_primary_transcript_{sample}.log",
-        ),
-        prefix=os.path.join(
-            config["output_dir"],
-            "TABLES",
-            "miRNA_primary_transcript_counts_{sample}",
-        ),
-    log:
-        os.path.join(
-            config["local_log"],
-            "quant_mirna_miRNA_primary_transcript_{sample}.log",
-        ),
-    singularity:
-        "docker://zavolab/python:3.6.5"
-    shell:
-        "(python \
-        {input.script} \
-        -i {input.intersect} \
-        --uniq=miRNA_primary_transcript \
-        -p={params.prefix} \
-        ) &> {log}"
-
-
-###############################################################################
-### isomiRs counting table
-###############################################################################
-
-
-# rule quant_isomirs:
-#     input:
-#         intersect=os.path.join(
-#             config["output_dir"], "{sample}", "intersect_isomirs.bed"
-#         ),
-#         script=os.path.join(config["scripts_dir"], "mirna_quantification.py"),
-#     output:
-#         table=os.path.join(
-#             config["output_dir"], "TABLES", "isomirs_counts_{sample}"
-#         ),
-#     params:
-#         cluster_log=os.path.join(
-#             config["cluster_log"], "quant_isomirs_{sample}.log"
-#         ),
-#         prefix=os.path.join(
-#             config["output_dir"], "TABLES", "isomirs_counts_{sample}"
-#         ),
-#     log:
-#         os.path.join(config["local_log"], "quant_isomirs_{sample}.log"),
-#     singularity:
-#         "docker://zavolab/python:3.6.5"
-#     shell:
-#         "(python \
-#         {input.script} \
-#         -i {input.intersect} \
-#         --uniq=miRNA \
-#         -p={params.prefix} \
-#         ) &> {log}"
-
-
-###############################################################################
-### Merge counting tables for all samples by mature/primary/isomirs forms.
-###############################################################################
-
-
-rule merge_tables:
-    input:
-        table=expand(
-            os.path.join(
-                config["output_dir"], "TABLES", "{mir}_counts_{sample}"
-            ),
-            sample=config["sample"],
-            mir=config["mir_list"],
-        ),
-        script=os.path.join(config["scripts_dir"], "merge_tables.R"),
-    output:
-        table=os.path.join(config["output_dir"], "TABLES", "counts.{mir}.tab"),
-    params:
-        cluster_log=os.path.join(
-            config["cluster_log"], "merge_tables_{mir}.log"
-        ),
-        prefix="{mir}_counts_",
-        input_dir=os.path.join(config["output_dir"], "TABLES"),
-    log:
-        os.path.join(config["local_log"], "merge_tables_{mir}.log"),
-    singularity:
-        "docker://zavolab/r-tidyverse:3.5.3"
-    shell:
-        "(Rscript \
-        {input.script} \
-        --input_dir {params.input_dir} \
-        --output_file {output.table} \
-        --prefix {params.prefix} \
-        --verbose \
-        ) &> {log}"
-- 
GitLab


From 0e26684698a3555a328c919a936f4a3f1c2b8842 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:24:57 +0000
Subject: [PATCH 12/22] Delete config_map.yaml

---
 test/config_map.yaml | 44 --------------------------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 test/config_map.yaml

diff --git a/test/config_map.yaml b/test/config_map.yaml
deleted file mode 100644
index 4ad569e..0000000
--- a/test/config_map.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-#### GLOBAL PARAMETERS ####
-
-# Directories
-# Usually there is no need to change these
-output_dir: "results/"
-local_log:  "logs/local"
-cluster_log: "logs/cluster"
-scripts_dir: "../scripts"
-
-# Resources: genome, transcriptome, genes, miRs
-# All of these are produced by the "prepare" workflow
-genome: "results/homo_sapiens/chrY/genome.processed.fa"
-gtf: "results/homo_sapiens/chrY/gene_annotations.filtered.gtf"
-transcriptome: "results/homo_sapiens/chrY/transcriptome_idtrim.fa"
-transcriptome_index_segemehl: "results/homo_sapiens/chrY/transcriptome_index_segemehl.idx"
-genome_index_segemehl: "results/homo_sapiens/chrY/genome_index_segemehl.idx"
-exons: "results/homo_sapiens/chrY/exons.bed"
-header_of_collapsed_fasta: "results/homo_sapiens/chrY/headerOfCollapsedFasta.sam"
-
-# Tool parameters: quality filter
-q_value: 10
-p_value: 50
-
-# Tool parameters: adapter removal
-error_rate: 0.1
-minimum_length: 15
-overlap: 3
-max_n: 0
-
-# Tool parameters: mapping
-max_length_reads: 30
-nh: 100
-
-# Inputs information
-input_dir: "test_files"
-sample: ["test_lib"]
-
-#### PARAMETERS SPECIFIC TO INPUTS ####
-
-test_lib:
-    adapter: "AACTGTAGGCACCATCAAT"
-    format: "fa"
-...
-- 
GitLab


From 489f15ce3fbb638fa205bf32f4b019c02708c9d7 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:25:04 +0000
Subject: [PATCH 13/22] Delete config_prepare.yaml

---
 test/config_prepare.yaml | 33 ---------------------------------
 1 file changed, 33 deletions(-)
 delete mode 100644 test/config_prepare.yaml

diff --git a/test/config_prepare.yaml b/test/config_prepare.yaml
deleted file mode 100644
index a74dba6..0000000
--- a/test/config_prepare.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
----
-#### GLOBAL PARAMETERS #####
-
-# Directories
-# Usually there is no need to change these
-output_dir: "results"
-scripts_dir: "../scripts"
-local_log: "logs/local"
-cluster_log: "logs/cluster"
-
-# Isomirs annotation file
-# Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
-bp_5p: [-1, 0, +1]
-bp_3p: [-1, 0, +1]
-
-# List of inputs
-organism: ["homo_sapiens/chrY"]
-
-#### PARAMETERS SPECIFIC TO INPUTS #####
-
-homo_sapiens/chrY:
-  # URLs to genome, gene & miRNA annotations
-  genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz"
-  gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
-  mirna_url: "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
-
-  # Chromosome name mappings between UCSC <-> Ensembl
-  # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
-  map_chr_url: "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
-  # Chromosome name mapping parameters:
-  column: 1
-  delimiter: "TAB"
-...
-- 
GitLab


From 0d35e6e163dba066e5aff30a93f6a7acad184a83 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:25:10 +0000
Subject: [PATCH 14/22] Delete config_quantify.yaml

---
 test/config_quantify.yaml | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 test/config_quantify.yaml

diff --git a/test/config_quantify.yaml b/test/config_quantify.yaml
deleted file mode 100644
index 691cedf..0000000
--- a/test/config_quantify.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
----
-#### GLOBAL PARAMETERS ####
-
-# Directories
-# Usually there is no need to change these
-output_dir: "results"
-scripts_dir: "../scripts"
-local_log: "logs/local"
-cluster_log: "logs/cluster"
-
-# Types of miRNAs to quantify
-#mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"]
-mir_list: ["miRNA", "miRNA_primary_transcript"]
-
-# Resources: miR annotations, chromosome name mappings
-# All of these are produced by the "prepare" workflow
-mirnas_anno: "results/homo_sapiens/chrY/mirna_filtered.bed"
-isomirs_anno: "results/homo_sapiens/chrY/isomirs_annotation.bed"
-
-# Inputs information
-input_dir: "results"
-sample: ["test_lib"]  # put all samples, separated by comma
-...
-- 
GitLab


From c848b44fa7d00b9a327b22fcd0691b745d6c7b23 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:25:51 +0000
Subject: [PATCH 15/22] Upload "config.yaml" file for the merged workflow

---
 test/config.yaml | 98 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 test/config.yaml

diff --git a/test/config.yaml b/test/config.yaml
new file mode 100644
index 0000000..98b4422
--- /dev/null
+++ b/test/config.yaml
@@ -0,0 +1,98 @@
+---
+#### GLOBAL PARAMETERS #####
+
+# Directories
+# Usually there is no need to change these
+map_input_dir: "test_files" # For the mapping worflow
+quantify_input_dir: "results" # For the quantify worflow
+output_dir: "results"
+scripts_dir: "../scripts"
+local_log: "logs/local"
+cluster_log: "logs/cluster"
+
+# Inputs information
+sample: ["test_lib"]
+
+#######################################################################################################
+####
+#### PREPARE PARAMETERS
+####
+#######################################################################################################
+
+# Isomirs annotation 
+# Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
+bp_5p: [-1, 0, +1]
+bp_3p: [-1, 0, +1]
+
+# List of inputs
+organism: ["homo_sapiens/chrY"]
+
+#### PARAMETERS SPECIFIC TO INPUTS #####
+
+homo_sapiens/chrY:
+  # URLs to genome, gene & miRNA annotations
+  genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz"
+  gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
+  mirna_url: "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
+
+  # Chromosome name mappings between UCSC <-> Ensembl
+  # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
+  map_chr_url: "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
+  # Chromosome name mapping parameters:
+  column: 1
+  delimiter: "TAB"
+
+#######################################################################################################
+####
+#### MAP PARAMETERS
+####
+#######################################################################################################
+
+# Resources: genome, transcriptome, genes, miRs
+# All of these are produced by the "prepare" workflow
+
+genome: "results/homo_sapiens/chrY/genome.processed.fa"
+gtf: "results/homo_sapiens/chrY/gene_annotations.filtered.gtf"
+transcriptome: "results/homo_sapiens/chrY/transcriptome_idtrim.fa"
+transcriptome_index_segemehl: "results/homo_sapiens/chrY/transcriptome_index_segemehl.idx"
+genome_index_segemehl: "results/homo_sapiens/chrY/genome_index_segemehl.idx"
+exons: "results/homo_sapiens/chrY/exons.bed"
+header_of_collapsed_fasta: "results/homo_sapiens/chrY/headerOfCollapsedFasta.sam"
+
+# Tool parameters: quality filter
+q_value: 10
+p_value: 50
+
+# Tool parameters: adapter removal
+error_rate: 0.1
+minimum_length: 15
+overlap: 3
+max_n: 0
+
+# Tool parameters: mapping
+max_length_reads: 30
+nh: 100
+
+
+#### PARAMETERS SPECIFIC TO INPUTS ####
+
+test_lib:
+  adapter: "AACTGTAGGCACCATCAAT"
+  format: "fa"
+
+#######################################################################################################
+####
+#### QUANTIFY PARAMETERS
+####
+#######################################################################################################
+
+
+# Types of miRNAs to quantify
+#mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"]
+mir_list: ["miRNA", "miRNA_primary_transcript"]
+
+# Resources: miR annotations, chromosome name mappings
+# All of these are produced by the "prepare" workflow
+mirnas_anno: "results/homo_sapiens/chrY/mirna_filtered.bed"
+isomirs_anno: "results/homo_sapiens/chrY/isomirs_annotation.bed"
+...
\ No newline at end of file
-- 
GitLab


From 5f7e5d53b19af368e5b29304f26762acb16eb788 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:36:03 +0000
Subject: [PATCH 16/22] Replace test_workflow_local.sh

---
 test/test_workflow_local.sh | 47 +++++--------------------------------
 1 file changed, 6 insertions(+), 41 deletions(-)

diff --git a/test/test_workflow_local.sh b/test/test_workflow_local.sh
index e59a3f4..b8efa82 100755
--- a/test/test_workflow_local.sh
+++ b/test/test_workflow_local.sh
@@ -16,56 +16,21 @@ user_dir=$PWD
 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 cd $script_dir
 
-# Run test: prepare workflow
+# Run test
 snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
+    --snakefile="../workflow/Snakefile" \
+    --cores 4  \
     --use-singularity \
     --singularity-args "--bind ${PWD}/../" \
-    --cores=4 \
     --printshellcmds \
     --rerun-incomplete \
     --verbose
 
-# Run test: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../" \
-    --cores=4 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Run test: quantify workflow
-snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../" \
-    --cores=4 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose 
-
-# Snakemake report: prepare workflow
-snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
-    --report="snakemake_report_prepare.html"
-
-# Snakemake report: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --report="snakemake_report_map.html"
 
-# Snakemake report: quantify workflow
+# Snakemake report
 snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
-    --report="snakemake_report_quantify.html"
+    --snakefile="../workflow/Snakefile" \
+    --report="snakemake_report.html"
 
 # Check md5 sum of some output files
 find results/ -type f -name \*\.gz -exec gunzip '{}' \;
-- 
GitLab


From 08ecdc9a45398e8f562c8b6c7c3c5512c9834eb7 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:36:46 +0000
Subject: [PATCH 17/22] Replace test_workflow_slurm.sh

---
 test/test_workflow_slurm.sh | 72 ++++---------------------------------
 1 file changed, 6 insertions(+), 66 deletions(-)

diff --git a/test/test_workflow_slurm.sh b/test/test_workflow_slurm.sh
index 0c50de3..a1e1925 100755
--- a/test/test_workflow_slurm.sh
+++ b/test/test_workflow_slurm.sh
@@ -21,56 +21,10 @@ mkdir -p logs/cluster/{homo_sapiens/chrY,test_lib}
 mkdir -p logs/local/{homo_sapiens/chrY,test_lib}
 mkdir -p results/{homo_sapiens/chrY,test_lib}
 
-# Run test: prepare workflow
+# Run test
 snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --jobscript="../jobscript.sh" \
-    --jobs=20 \
-    --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../" \
-    --cores=256 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Run test: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --jobscript="../jobscript.sh" \
-    --jobs=20 \
-    --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../" \
+    --snakefile="../workflow/Snakefile" \
     --cores=256 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Run test: quantify workflow
-snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
     --cluster-config="cluster.json" \
     --cluster "sbatch \
         --cpus-per-task={cluster.threads} \
@@ -84,29 +38,15 @@ snakemake \
     --jobscript="../jobscript.sh" \
     --jobs=20 \
     --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../" \
-    --cores=256 \
+    --singularity-args="--bind ${PWD}/../" \
     --printshellcmds \
     --rerun-incomplete \
     --verbose
 
-# Snakemake report: prepare workflow
-snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
-    --report="snakemake_report_prepare.html"
-
-# Snakemake report: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --report="snakemake_report_map.html"
-
-# Snakemake report: quantify workflow
+# Snakemake report
 snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
-    --report="snakemake_report_quantify.html"
+    --snakefile="../workflow/Snakefile" \
+    --report="snakemake_report.html"
 
 # Check md5 sum of some output files
 find results/ -type f -name \*\.gz -exec gunzip '{}' \;
-- 
GitLab


From 37f9af9be3734f2e84e63535439555d5a602a628 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:38:48 +0000
Subject: [PATCH 18/22] Replace test_rule_graph.sh

---
 test/test_rule_graph.sh | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/test/test_rule_graph.sh b/test/test_rule_graph.sh
index 3b0b235..66ca559 100755
--- a/test/test_rule_graph.sh
+++ b/test/test_rule_graph.sh
@@ -16,32 +16,12 @@ user_dir=$PWD
 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 cd $script_dir
 
-# Run test: prepare workflow
+# Run test
 snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
+    --snakefile="../workflow/Snakefile" \
+    --configfile="config.yaml" \
     --rulegraph \
     --printshellcmds \
     --dryrun \
     --verbose \
-    | dot -Tsvg > "../images/rule_graph_prepare.svg"
-
-# Run test: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --rulegraph \
-    --printshellcmds \
-    --dryrun \
-    --verbose \
-    | dot -Tsvg > "../images/rule_graph_map.svg"
-
-# Run test: quantify workflow
-snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
-    --rulegraph \
-    --printshellcmds \
-    --dryrun \
-    --verbose \
-    | dot -Tsvg > "../images/rule_graph_quantify.svg"
+    | dot -Tsvg > "../images/rule_graph_.svg"
-- 
GitLab


From cb3756a133705ca00479ed71d9bd13747e886e86 Mon Sep 17 00:00:00 2001
From: Iris Mestres Pascual <iris.mestrespascual@unibas.ch>
Date: Tue, 17 Jan 2023 17:40:04 +0000
Subject: [PATCH 19/22] Replace test_dag.sh

---
 test/test_dag.sh | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/test/test_dag.sh b/test/test_dag.sh
index f472b15..de93730 100755
--- a/test/test_dag.sh
+++ b/test/test_dag.sh
@@ -16,32 +16,12 @@ user_dir=$PWD
 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 cd $script_dir
 
-# Run test: prepare workflow
+# Run test
 snakemake \
-    --snakefile="../workflow/prepare/Snakefile" \
-    --configfile="config_prepare.yaml" \
+    --snakefile="../workflow/Snakefile" \
+    --configfile="config.yaml" \
     --dag \
     --printshellcmds \
     --dryrun \
     --verbose \
-    | dot -Tsvg > "../images/workflow_dag_prepare.svg"
-
-# Run test: map workflow
-snakemake \
-    --snakefile="../workflow/map/Snakefile" \
-    --configfile="config_map.yaml" \
-    --dag \
-    --printshellcmds \
-    --dryrun \
-    --verbose \
-    | dot -Tsvg > "../images/workflow_dag_map.svg"
-
-# Run test: quantify workflow
-snakemake \
-    --snakefile="../workflow/quantify/Snakefile" \
-    --configfile="config_quantify.yaml" \
-    --dag \
-    --printshellcmds \
-    --dryrun \
-    --verbose \
-    | dot -Tsvg > "../images/workflow_dag_quantify.svg"
+    | dot -Tsvg > "../images/workflow_dag.svg"
\ No newline at end of file
-- 
GitLab


From 1d90d9ebead171a0d70d67168ad7ec935a9e9bec Mon Sep 17 00:00:00 2001
From: Alex Kanitz <alexander.kanitz@alumni.ethz.ch>
Date: Thu, 19 Jan 2023 15:25:33 +0100
Subject: [PATCH 20/22] simple change to trigger CI

---
 test/test_workflow_local.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_workflow_local.sh b/test/test_workflow_local.sh
index b8efa82..8c87878 100755
--- a/test/test_workflow_local.sh
+++ b/test/test_workflow_local.sh
@@ -5,6 +5,7 @@ cleanup () {
     rc=$?
     cd $user_dir
     echo "Exit status: $rc"
+    rm -rf .snakemake
 }
 trap cleanup EXIT
 
-- 
GitLab


From e8e8dec3010f5fc26290a9dbffd45509b166cdad Mon Sep 17 00:00:00 2001
From: Alex Kanitz <alexander.kanitz@alumni.ethz.ch>
Date: Fri, 20 Jan 2023 13:02:17 +0100
Subject: [PATCH 21/22] temporarily deactivate CI

---
 .gitlab-ci.yml | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8a55175..69d0806 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,18 +1,18 @@
-default:
-  tags:
-    - shell
-
-before_script:
-  - mamba env create --force -f environment.yml
-  - conda activate mirflowz
-  - echo $CONDA_DEFAULT_ENV
-  - snakemake --version
-
-test:
-  script:
-    - bash test/test_workflow_local.sh
-    - bash test/test_dag.sh
-    - bash test/test_rule_graph.sh
-
-after_script:
-  - bash test/test_cleanup.sh
+# default:
+#   tags:
+#     - shell
+#
+# before_script:
+#   - mamba env create --force -f environment.yml
+#   - conda activate mirflowz
+#   - echo $CONDA_DEFAULT_ENV
+#   - snakemake --version
+#
+# test:
+#   script:
+#     - bash test/test_workflow_local.sh
+#     - bash test/test_dag.sh
+#     - bash test/test_rule_graph.sh
+#
+# after_script:
+#   - bash test/test_cleanup.sh
-- 
GitLab


From b3c38a2164067729386329bd48cb6afd03977228 Mon Sep 17 00:00:00 2001
From: Alex Kanitz <alexander.kanitz@alumni.ethz.ch>
Date: Fri, 20 Jan 2023 13:02:53 +0100
Subject: [PATCH 22/22] temporarily deactivate CI

---
 .gitlab-ci.yml | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 .gitlab-ci.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index 69d0806..0000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-# default:
-#   tags:
-#     - shell
-#
-# before_script:
-#   - mamba env create --force -f environment.yml
-#   - conda activate mirflowz
-#   - echo $CONDA_DEFAULT_ENV
-#   - snakemake --version
-#
-# test:
-#   script:
-#     - bash test/test_workflow_local.sh
-#     - bash test/test_dag.sh
-#     - bash test/test_rule_graph.sh
-#
-# after_script:
-#   - bash test/test_cleanup.sh
-- 
GitLab