Skip to content
Snippets Groups Projects
Snakefile 35.2 KiB
Newer Older
        alfa_orientation = lambda wildcards:
            [samples_table.loc[
                wildcards.sample, "alfa_directionality"]],
        genome_index = lambda wildcards, input:
            os.path.abspath(
                os.path.join(
                    os.path.dirname(input.gtf),
                    "sorted_genes")),
Dominik Burri's avatar
Dominik Burri committed
        name = "{sample}"

    singularity:
        "docker://zavolab/alfa:1.1.1-slim"
Dominik Burri's avatar
Dominik Burri committed

        os.path.join(
            config["log_dir"],
            "samples",
            "{sample}",
            "alfa_qc.{unique}.log")
Dominik Burri's avatar
Dominik Burri committed

    shell:
        "(cd {params.out_dir}; \
        alfa \
        -g {params.genome_index} \
        --bedgraph {params.plus} {params.minus} {params.name} \
        -s {params.alfa_orientation}) &> {log}"
# cd {params.out_dir};
Dominik Burri's avatar
Dominik Burri committed
rule alfa_qc_all_samples:
    '''
        Run ALFA from stranded bedgraph files on all samples
    '''
Dominik Burri's avatar
Dominik Burri committed
    input:
        tables = lambda wildcards:
            expand(
                os.path.join(
                    config["output_dir"],
                    "samples",
                    "{sample}",
                    "ALFA",
                    "{unique}",
                    "{sample}.ALFA_feature_counts.tsv"),
                sample=samples_table.index.values,
                unique=wildcards.unique)
Dominik Burri's avatar
Dominik Burri committed
    output:
        biotypes = os.path.join(
            config["output_dir"],
            "ALFA",
            "{unique}",
Dominik Burri's avatar
Dominik Burri committed
            "ALFA_plots.Biotypes.pdf"),
        categories = os.path.join(
            config["output_dir"],
            "ALFA",
            "{unique}",
Dominik Burri's avatar
Dominik Burri committed
            "ALFA_plots.Categories.pdf")

    params:
        out_dir = lambda wildcards, output:
            os.path.dirname(output.biotypes)
Dominik Burri's avatar
Dominik Burri committed

    log:
        os.path.join(
            config["log_dir"],
            "alfa_qc_all_samples.{unique}.log")
Dominik Burri's avatar
Dominik Burri committed

    singularity:
        "docker://zavolab/alfa:1.1.1-slim"
Dominik Burri's avatar
Dominik Burri committed

    shell:
        "(alfa -c {input.tables} -o {params.out_dir}) &> {log}"
rule alfa_concat_results:
    input:
        expand(
            os.path.join(
                config["output_dir"],
                "ALFA",
                "{unique}",
                "ALFA_plots.{annotation}.pdf"),
            unique=["Unique", "UniqueMultiple"],
            annotation=["Categories", "Biotypes"])
        os.path.join(
            config["output_dir"],
            "ALFA",
            "ALFA_plots_mqc.png")

    params:
        density = 300

    log:
        os.path.join(
            config["log_dir"],
            "alfa_qc_all_samples.concat.log")

    singularity:
        "docker://zavolab/imagemagick:7.0.8"
        "(convert -append -density {params.density} \
            {input} {output}) &> {log}"


rule prepare_multiqc_config:
    '''
        Prepare config for the MultiQC
    '''
    input:
        script = os.path.join(
            workflow.basedir,
            "workflow",
            "scripts",
            "rhea_multiqc_config.py")

    output:
        multiqc_config = os.path.join(
            config["output_dir"],
            "multiqc_config.yaml")

    params:
        logo_path = config['logo'],
        multiqc_intro_text = config['multiqc_intro_text'],
        url = config['multiqc_url']

    log:
        stderr = os.path.join(
            config["log_dir"],
            "prepare_multiqc_config.stderr.log"),
        stdout = os.path.join(
            config["log_dir"],
            "prepare_multiqc_config.stdout.log")

    shell:
        "(python {input.script} \
        --config {output.multiqc_config} \
        --intro-text '{params.multiqc_intro_text}' \
        --custom-logo {params.logo_path} \
        --url '{params.url}') \
        1> {log.stdout} 2> {log.stderr}"
rule multiqc_report:
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
        Create report with MultiQC
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
    input:
        fastqc_se = expand(
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
            os.path.join(
                config['output_dir'],
                "samples",
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
                "{sample}",
                "fastqc",
                "{mate}"),
            sample=samples_table.index.values,
            mate="fq1"),

        fastqc_pe = expand(
            os.path.join(
                config['output_dir'],
                "samples",
                "{sample}",
                "fastqc",
                "{mate}"),
            sample=[i for i in list(
                samples_table[samples_table['seqmode'] == 'pe'].index.values)],
            mate="fq2"),

BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
        pseudoalignment = expand(
            os.path.join(
                config['output_dir'],
                "samples",
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
                "{sample}",
                "quant_kallisto",
                "{sample}.{seqmode}.kallisto.pseudo.sam"),
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
            zip,
            sample=[i for i in list(samples_table.index.values)],
            seqmode=[samples_table.loc[i, 'seqmode']
                     for i in list(samples_table.index.values)]),
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
        TIN_boxplot_PNG = os.path.join(
            config['output_dir'],
            "TIN_scores_boxplot_mqc.png"),
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
        TIN_boxplot_PDF = os.path.join(
            config['output_dir'],
            "TIN_scores_boxplot_mqc.pdf"),
        alfa_concat_out = os.path.join(
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
            config["output_dir"],
            "ALFA",
            "ALFA_plots_mqc.png"),
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed

        multiqc_config = os.path.join(
            "multiqc_config.yaml")

    output:
        multiqc_report = directory(
            os.path.join(
                config["output_dir"],
                "multiqc_summary"))

BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    params:
        results_dir = os.path.join(
            config["output_dir"]),
        log_dir = config["log_dir"]

BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    log:
        stderr = os.path.join(
            config["log_dir"],
            "multiqc_report.stderr.log"),
        stdout = os.path.join(
            config["log_dir"],
            "multiqc_report.stdout.log")
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed

    singularity:
        "docker://ewels/multiqc:1.7"
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed

    shell:
        "(multiqc \
        --outdir {output.multiqc_report} \
        --config {input.multiqc_config} \
        {params.results_dir} \
        {params.log_dir};) \
        1> {log.stdout} 2> {log.stderr}"
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed

rule sort_bed_4_big:
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
        sort bedGraphs in order to work with bedGraphtobigWig
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
    input:
        bg = os.path.join(
            "samples",
            "{sample}",
            "ALFA",
            "{unique}",
            "{sample}.{unique}.{strand}.bg")

BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    output:
        sorted_bg = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "bigWig",
            "{unique}",
            "{sample}_{unique}_{strand}.sorted.bg")

    singularity:
        "docker://cjh4zavolab/bedtools:2.27"
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    log:
        stderr = os.path.join(
            config["log_dir"],
            "samples",
            "{sample}",
            "sort_bg_{unique}_{strand}.stderr.log")

    shell:
        "(sortBed \
         -i {input.bg} \
         > {output.sorted_bg};) 2> {log.stderr}"

rule prepare_bigWig:
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
        bedGraphtobigWig, for viewing in genome browsers
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    '''
    input:
        sorted_bg = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "bigWig",
            "{unique}",
            "{sample}_{unique}_{strand}.sorted.bg"),
        chr_sizes = lambda wildcards:
            os.path.join(
                config['star_indexes'],
                samples_table.loc[wildcards.sample, "organism"],
                str(samples_table.loc[wildcards.sample, "index_size"]),
                "STAR_index",
                "chrNameLength.txt")

BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    output:
        bigWig = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "bigWig",
            "{unique}",
            "{sample}_{unique}_{strand}.bw")

    singularity:
        "docker://zavolab/bedgraphtobigwig:4-slim"
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    log:
        stderr = os.path.join(
            config["log_dir"],
            "samples",
            "{sample}",
            "bigwig_{unique}_{strand}.stderr.log"),
        stdout = os.path.join(
            config["log_dir"],
            "samples",
            "{sample}",
            "bigwig_{unique}_{strand}.stdout.log")
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
    shell:
        "(bedGraphToBigWig \
         {input.sorted_bg} \
         {input.chr_sizes} \
         {output.bigWig};) \
         1> {log.stdout} 2> {log.stderr}"