Skip to content
Snippets Groups Projects
single_end.snakefile.smk 11.1 KiB
Newer Older
rule remove_adapters_cutadapt:
        reads = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "start",
            "{sample}.fq1.fastq.gz")
        reads = temp(os.path.join(
            "samples",
            "{sample}.se.remove_adapters_mate1.fastq.gz"))
            get_sample(
                'fq1_3p',
                search_id='index',
                search_value=wildcards.sample),
            get_sample(
                'fq1_5p',
                search_id='index',
                search_value=wildcards.sample)
Alex Kanitz's avatar
Alex Kanitz committed
        "docker://zavolab/cutadapt:1.16-slim"
            "samples",
            "remove_adapters_cutadapt.se.stderr.log"),
            "samples",
            "remove_adapters_cutadapt.se.stdout.log")
    shell:
        "(cutadapt \
        -j {threads} \
        -m 10 \
        -a {params.adapters_3} \
        -g {params.adapters_5} \
        -o {output.reads} \
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
        {input.reads}) \
            "samples",
            "{sample}.se.remove_adapters_mate1.fastq.gz")
        reads = temp(os.path.join(
            "samples",
            "{sample}.se.remove_polya_mate1.fastq.gz"))
            get_sample(
                'fq1_polya_3p',
                search_id='index',
                search_value=wildcards.sample),
        polya_5 = lambda wildcards:
            get_sample(
                'fq1_polya_5p',
                search_id='index',
                search_value=wildcards.sample)
Alex Kanitz's avatar
Alex Kanitz committed
        "docker://zavolab/cutadapt:1.16-slim"
            "samples",
            "remove_polya_cutadapt.se.stderr.log"),
            "samples",
            "remove_polya_cutadapt.se.stdout.log")
    shell:
        "(cutadapt \
        -j {threads} \
        -O 1 \
        -m 10  \
        -a {params.polya_3} \
        -g {params.polya_5} \
        {input.reads};) \
    input:
        index = lambda wildcards:
            os.path.join(
                config["star_indexes"],
                get_sample('organism', search_id='index', search_value=wildcards.sample),
                get_sample('index_size', search_id='index', search_value=wildcards.sample),
                "STAR_index",
                "chrNameLength.txt"),
        reads = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}.se.remove_polya_mate1.fastq.gz")
            "samples",
            "{sample}.se.Aligned.sortedByCoord.out.bam"),
            "samples",
            "{sample}.se.Log.final.out")
    params:
        sample_id = "{sample}",
        index = lambda wildcards:
            os.path.abspath(os.path.join(
                get_sample('organism', search_id='index', search_value=wildcards.sample),
                get_sample('index_size', search_id='index', search_value=wildcards.sample),
        outFileNamePrefix = os.path.join(
            "samples",
            "{sample}.se."),
        multimappers = lambda wildcards:
            get_sample(
                'multimappers',
                search_id='index',
                search_value=wildcards.sample),
        soft_clip = lambda wildcards:
            get_sample(
                'soft_clip',
                search_id='index',
                search_value=wildcards.sample),
        pass_mode = lambda wildcards:
            get_sample(
                'pass_mode',
                search_id='index',
                search_value=wildcards.sample)
Alex Kanitz's avatar
Alex Kanitz committed
        "docker://zavolab/star:2.7.3a-slim"
            "samples",
            "map_genome_star.se.stderr.log")
    shell:
        "(STAR \
        -- twopassMode {params.pass_mode} \
        --runThreadN {threads} \
        --genomeDir {params.index} \
        --readFilesIn {input.reads} \
        --readFilesCommand zcat \
        --outFilterMultimapNmax {params.multimappers} \
        --outFilterMultimapScoreRange 0 \
        --outFileNamePrefix {params.outFileNamePrefix} \
        --outSAMattributes All \
        --outStd BAM_SortedByCoordinate \
        --outSAMtype BAM SortedByCoordinate \
        --outFilterType BySJout \
        --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
        --alignEndsType {params.soft_clip} > {output.bam};) \
        2> {log.stderr}"
    '''
        Quantification at transcript and gene level using Salmon
    '''
            "samples",
            "{sample}.se.remove_polya_mate1.fastq.gz"),
        index = lambda wildcards:
            os.path.join(
                config["salmon_indexes"],
                get_sample(
                    'organism',
                    search_id='index',
                    search_value=wildcards.sample),
                get_sample(
                    'kmer',
                    search_id='index',
                    search_value=wildcards.sample),
            os.path.abspath(get_sample(
                search_value=wildcards.sample))
    output:
        gn_estimates = os.path.join(
            "samples",
            "{sample}.salmon.se",
            "quant.genes.sf"),
        tr_estimates = os.path.join(
            "samples",
            "{sample}.salmon.se",
            "quant.sf"),
        meta_info = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "{sample}.salmon.se",
            "aux_info",
            "meta_info.json"),
        flenDist = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "{sample}.salmon.se",
            "libParams",
            "flenDist.txt")

    shadow: "minimal"
    params:
        output_dir = os.path.join(
            "samples",
            "{sample}.salmon.se"),
            get_sample(
                'libtype',
                search_id='index',
                search_value=wildcards.sample),
            get_sample(
                'mean',
                search_id='index',
                search_value=wildcards.sample),
            get_sample(
                'sd',
                search_id='index',
                search_value=wildcards.sample)
            "samples",
            "quantification_salmon.se.stderr.log"),
            "samples",
            "quantification_salmon.se.stdout.log")
    singularity:
        "docker://zavolab/salmon:1.1.0-slim"
    shell:
        "(salmon quant \
        --libType {params.libType} \
        --seqBias \
        --validateMappings \
        --threads {threads} \
        --fldMean {params.fraglen} \
        --fldSD {params.fragsd} \
        --index {input.index} \
        --geneMap {input.gtf} \
        --unmatedReads {input.reads} \
        -o {params.output_dir};) \
        1> {log.stdout} 2> {log.stderr}"

rule genome_quantification_kallisto:
    '''
        Quantification at transcript and gene level using Kallisto
    '''
            "samples",
            "{sample}.se.remove_polya_mate1.fastq.gz"),
        index = lambda wildcards:
            os.path.join(
                config["kallisto_indexes"],
                get_sample(
                    'organism',
                    search_id='index',
                    search_value=wildcards.sample),
    output:
        pseudoalignment = os.path.join(
            "samples",
            "{sample}.se.kallisto.pseudo.sam"),
        abundances = os.path.join(
            config["output_dir"],
            "samples",
            "{sample}",
            "quant_kallisto",
            "abundance.h5")

    shadow: "minimal"
    params:
        output_dir = os.path.join(
            "samples",
            "{sample}",
            "quant_kallisto"),
        fraglen = lambda wildcards:
            get_sample(
                'mean',
                search_id='index',
                search_value=wildcards.sample),
            get_sample(
                'sd',
                search_id='index',
                search_value=wildcards.sample),
            get_sample(
                'kallisto_directionality',
                search_id='index',
                search_value=wildcards.sample)
            "samples",
            "genome_quantification_kallisto.se.stderr.log")
Alex Kanitz's avatar
Alex Kanitz committed
        "docker://zavolab/kallisto:0.46.1-slim"
    shell:
        "(kallisto quant \
        -i {input.index} \
        -o {params.output_dir} \
        --single \
        -l {params.fraglen} \
        -s {params.fragsd} \
        --pseudobam \
        {params.directionality}-stranded \
Dominik Burri's avatar
Dominik Burri committed
        2> {log.stderr}"