Skip to content
Snippets Groups Projects
Commit ff0a39fa authored by BIOPZ-Katsantoni Maria's avatar BIOPZ-Katsantoni Maria Committed by BIOPZ-Gypas Foivos
Browse files

Separate stdout and stderr logs for the majority of rules. Closes #76 and #79

parent f31261b5
Branches
Tags
No related merge requests found
...@@ -24,135 +24,138 @@ os.makedirs( ...@@ -24,135 +24,138 @@ os.makedirs(
os.getcwd(), os.getcwd(),
config['log_dir'], config['log_dir'],
), ),
exist_ok=True, exist_ok=True)
)
if cluster_config: if cluster_config:
os.makedirs( os.makedirs(
os.path.join( os.path.join(
os.getcwd(), os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']), os.path.dirname(cluster_config['__default__']['out']),
), ),
exist_ok=True, exist_ok=True)
)
# Include subworkflows # Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk") include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk") include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
# Final rule
rule finish: rule finish:
"""Rule for collecting outputs""" """
Rule for collecting outputs
"""
input: input:
outdir1 = expand( outdir1 = expand(
os.path.join( os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"mate1_fastqc", "mate1_fastqc"),
),
zip, zip,
sample=[i for i in list(samples_table.index.values)], sample=[i for i in list(samples_table.index.values)],
seqmode=[ seqmode=[samples_table.loc[i, 'seqmode']
samples_table.loc[i, 'seqmode'] for i in list(samples_table.index.values)]),
for i in list(samples_table.index.values)
]
),
salmon_gn_estimates = expand( salmon_gn_estimates = expand(
os.path.join( os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"salmon_quant", "salmon_quant",
"quant.genes.sf", "quant.genes.sf"),
),
zip, zip,
sample=[i for i in list(samples_table.index.values)], sample=[i for i in list(samples_table.index.values)],
seqmode=[ seqmode=[samples_table.loc[i, 'seqmode']
samples_table.loc[i, 'seqmode'] for i in list(samples_table.index.values)]),
for i in list(samples_table.index.values)
]
),
pseudoalignment = expand( pseudoalignment = expand(
os.path.join( os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"quant_kallisto", "quant_kallisto",
"{sample}.kallisto.pseudo.sam", "{sample}.kallisto.pseudo.sam"),
),
zip, zip,
sample=[i for i in list(samples_table.index.values)], sample=[i for i in list(samples_table.index.values)],
seqmode=[ seqmode=[samples_table.loc[i, 'seqmode']
samples_table.loc[i, 'seqmode'] for i in list(samples_table.index.values)]),
for i in list(samples_table.index.values)
]
),
TIN_score = expand( TIN_score = expand(
os.path.join( os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"TIN", "TIN",
"TIN_score.tsv", "TIN_score.tsv"),
),
zip, zip,
sample=[i for i in list(samples_table.index.values)], sample=[i for i in list(samples_table.index.values)],
seqmode=[ seqmode=[samples_table.loc[i, 'seqmode']
samples_table.loc[i, 'seqmode'] for i in list(samples_table.index.values)]),
for i in list(samples_table.index.values) salmon_merge_genes = expand(
] os.path.join(
), config["output_dir"],
salmon_merge_genes = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"]), "summary_salmon",
salmon_merge_transcripts = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"]) "quantmerge",
"genes_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"])
rule create_index_star: rule create_index_star:
"""Create index for STAR alignments""" """
Create index for STAR alignments
"""
input: input:
genome = lambda wildcards: genome = lambda wildcards:
samples_table['genome'][ samples_table['genome']
samples_table['organism'] == wildcards.organism [samples_table['organism'] == wildcards.organism]
][0], [0],
gtf = lambda wildcards: gtf = lambda wildcards:
samples_table['gtf'][ samples_table['gtf']
samples_table['organism'] == wildcards.organism [samples_table['organism'] == wildcards.organism]
][0], [0]
output: output:
chromosome_info = os.path.join( chromosome_info = os.path.join(
config['star_indexes'], config['star_indexes'],
"{organism}", "{organism}",
"{index_size}", "{index_size}",
"STAR_index", "STAR_index",
"chrNameLength.txt", "chrNameLength.txt"),
),
chromosomes_names = os.path.join( chromosomes_names = os.path.join(
config['star_indexes'], config['star_indexes'],
"{organism}", "{organism}",
"{index_size}", "{index_size}",
"STAR_index", "STAR_index",
"chrName.txt", "chrName.txt")
),
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config['star_indexes'], config['star_indexes'],
"{organism}", "{organism}",
"{index_size}", "{index_size}",
"STAR_index", "STAR_index"),
),
outFileNamePrefix = os.path.join( outFileNamePrefix = os.path.join(
config['star_indexes'], config['star_indexes'],
"{organism}", "{organism}",
"{index_size}", "{index_size}",
"STAR_index/STAR_", "STAR_index/STAR_"),
), sjdbOverhang = "{index_size}"
sjdbOverhang = "{index_size}",
singularity: singularity:
"docker://zavolab/star:2.7.3a-slim" "docker://zavolab/star:2.7.3a-slim"
threads: 12 threads: 12
log: log:
os.path.join( stderr = os.path.join(
config['log_dir'], config['log_dir'],
"{organism}_{index_size}_create_index_star.log", "{organism}_{index_size}_create_index_star.stderr.log"),
) stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell: shell:
"(mkdir -p {params.output_dir}; \ "(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \ chmod -R 777 {params.output_dir}; \
...@@ -163,223 +166,292 @@ rule create_index_star: ...@@ -163,223 +166,292 @@ rule create_index_star:
--genomeFastaFiles {input.genome} \ --genomeFastaFiles {input.genome} \
--runThreadN {threads} \ --runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \ --outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}" --sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon: rule create_index_salmon:
"""Create index for Salmon quantification""" """
Create index for Salmon quantification
"""
input: input:
transcriptome = lambda wildcards: transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][ samples_table['tr_fasta_filtered']
samples_table['organism'] == wildcards.organism [samples_table['organism'] == wildcards.organism]
][0] [0]
output: output:
index = directory( index = directory(
os.path.join( os.path.join(
config['salmon_indexes'], config['salmon_indexes'],
"{organism}", "{organism}",
"{kmer}", "{kmer}",
"salmon.idx", "salmon.idx"))
)
),
params: params:
kmerLen = "{kmer}", kmerLen = "{kmer}"
singularity: singularity:
"docker://zavolab/salmon:1.1.0-slim" "docker://zavolab/salmon:1.1.0-slim"
log: log:
os.path.join( stderr = os.path.join(
config['log_dir'], config['log_dir'],
"{organism}_{kmer}_create_index_salmon.log" "{organism}_{kmer}_create_index_salmon.stderr.log"),
) stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
threads: 8 threads: 8
shell: shell:
"(salmon index \ "(salmon index \
--transcripts {input.transcriptome} \ --transcripts {input.transcriptome} \
--index {output.index} \ --index {output.index} \
--kmerLen {params.kmerLen} \ --kmerLen {params.kmerLen} \
--threads {threads}) &> {log}" --threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_kallisto: rule create_index_kallisto:
"""Create index for Kallisto quantification""" """
Create index for Kallisto quantification
"""
input: input:
transcriptome = lambda wildcards: transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][ samples_table['tr_fasta_filtered']
samples_table['organism'] == wildcards.organism [samples_table['organism'] == wildcards.organism]
][0], [0]
output: output:
index = os.path.join( index = os.path.join(
config['kallisto_indexes'], config['kallisto_indexes'],
"{organism}", "{organism}",
"kallisto.idx", "kallisto.idx")
),
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config['kallisto_indexes'], config['kallisto_indexes'],
"{organism}", "{organism}")
),
singularity: singularity:
"docker://zavolab/kallisto:0.46.1-slim" "docker://zavolab/kallisto:0.46.1-slim"
log: log:
os.path.join( stderr = os.path.join(
config['log_dir'], config['log_dir'],
"{organism}_create_index_kallisto.log" "{organism}_create_index_kallisto.stderr.log"),
) stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
shell: shell:
"(mkdir -p {params.output_dir}; \ "(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \ chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}" kallisto index -i {output.index} {input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
rule extract_transcripts_as_bed12: rule extract_transcripts_as_bed12:
"""Convert transcripts to BED12 format""" """
Convert transcripts to BED12 format
"""
input: input:
gtf = lambda wildcards: gtf = lambda wildcards:
samples_table['gtf'][0], samples_table['gtf']
[0]
output: output:
bed12 = os.path.join( bed12 = os.path.join(
config['output_dir'], config['output_dir'],
"full_transcripts_protein_coding.bed", "full_transcripts_protein_coding.bed")
),
singularity: singularity:
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim" "docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim"
threads: 1 threads: 1
log: log:
os.path.join( stderr = os.path.join(
config['log_dir'], config['log_dir'],
"extract_transcripts_as_bed12.log", "extract_transcripts_as_bed12.stderr.log")
)
shell: shell:
"gtf_transcript_type_to_bed12.pl \ "(gtf_transcript_type_to_bed12.pl \
--anno={input.gtf} \ --anno={input.gtf} \
--type=protein_coding \ --type=protein_coding > {output.bed12}); \
1> {output.bed12} \ 2> {log.stderr}"
2> {log}"
rule calculate_TIN_scores: rule calculate_TIN_scores:
"""Caluclate transcript integrity (TIN) score""" """
Caluclate transcript integrity (TIN) score
"""
input: input:
bai = os.path.join( bai = os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"map_genome", "map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai" "{sample}_Aligned.sortedByCoord.out.bam.bai"),
),
transcripts_bed12 = os.path.join( transcripts_bed12 = os.path.join(
config['output_dir'], config['output_dir'],
"full_transcripts_protein_coding.bed" "full_transcripts_protein_coding.bed")
),
output: output:
TIN_score = os.path.join( TIN_score = os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"TIN", "TIN",
"TIN_score.tsv", "TIN_score.tsv")
),
params: params:
bam = os.path.join( bam = os.path.join(
config['output_dir'], config['output_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"map_genome", "map_genome",
"{sample}_Aligned.sortedByCoord.out.bam" "{sample}_Aligned.sortedByCoord.out.bam"),
), sample = "{sample}"
sample = "{sample}",
log: log:
os.path.join( stderr = os.path.join(
config['log_dir'], config['log_dir'],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"calculate_TIN_scores.log", "calculate_TIN_scores.log")
)
threads: 8 threads: 8
singularity: singularity:
"docker://zavolab/tin_score_calculation:0.1.0-slim" "docker://zavolab/tin_score_calculation:0.1.0-slim"
shell: shell:
"tin_score_calculation.py \ "(tin_score_calculation.py \
-i {params.bam} \ -i {params.bam} \
-r {input.transcripts_bed12} \ -r {input.transcripts_bed12} \
-c 0 \ -c 0 \
--names {params.sample} \ --names {params.sample} \
-n 100 \ -n 100 > {output.TIN_score};) 2> {log.stderr}"
1> {output.TIN_score} \
2> {log}"
rule salmon_quantmerge_genes: rule salmon_quantmerge_genes:
''' Merge gene quantifications into a single file. ''' '''
Merge gene quantifications into a single file
'''
input: input:
salmon_in = expand(os.path.join( salmon_in = expand(
config["output_dir"], os.path.join(
"{seqmode}", config["output_dir"],
"{sample}", "{seqmode}",
"salmon_quant", "{sample}",
"quant.genes.sf"), "salmon_quant",
"quant.genes.sf"),
zip, zip,
sample= list(samples_table.index.values), sample=list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])), seqmode=list(samples_table["seqmode"]))
output: output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv") salmon_out = os.path.join(
params:
salmon_dir = expand(os.path.join(
config["output_dir"], config["output_dir"],
"{seqmode}", "summary_salmon",
"{sample}", "quantmerge",
"salmon_quant"), "genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip, zip,
sample= list(samples_table.index.values), sample=list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])), seqmode=list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)), sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}" salmon_merge_on = "{salmon_merge_on}"
log: log:
os.path.join(config["log_dir"], "salmon_quantmerge_genes_{salmon_merge_on}.log") stderr = os.path.join(
threads: 1 config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
threads: 1
singularity: singularity:
"docker://zavolab/salmon:1.1.0-slim" "docker://zavolab/salmon:1.1.0-slim"
shell: shell:
"(salmon quantmerge \ "(salmon quantmerge \
--quants {params.salmon_dir} \ --quants {params.salmon_dir} \
--genes \ --genes \
--names {params.sample_name_list} \ --names {params.sample_name_list} \
--column {params.salmon_merge_on} \ --column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}" --output {output.salmon_out};) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_transcripts: rule salmon_quantmerge_transcripts:
''' Merge gene quantifications into a single file. ''' '''
Merge gene quantifications into a single file
'''
input: input:
salmon_in = expand(os.path.join( salmon_in = expand(
config["output_dir"], os.path.join(
"{seqmode}", config["output_dir"],
"{sample}", "{seqmode}",
"salmon_quant", "{sample}",
"quant.sf"), "salmon_quant",
"quant.sf"),
zip, zip,
sample= list(samples_table.index.values), sample=list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])), seqmode=list(samples_table["seqmode"])),
output: output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv") salmon_out = os.path.join(
params:
salmon_dir = expand(os.path.join(
config["output_dir"], config["output_dir"],
"{seqmode}", "summary_salmon",
"{sample}", "quantmerge",
"salmon_quant"), "transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip, zip,
sample= list(samples_table.index.values), sample=list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])), seqmode=list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)), sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}" salmon_merge_on = "{salmon_merge_on}"
log: log:
os.path.join(config["log_dir"], "salmon_quantmerge_transcripts_{salmon_merge_on}.log") stderr = os.path.join(
threads: 1 config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
threads: 1
singularity: singularity:
"docker://zavolab/salmon:1.1.0-slim" "docker://zavolab/salmon:1.1.0-slim"
shell: shell:
"(salmon quantmerge \ "(salmon quantmerge \
--quants {params.salmon_dir} \ --quants {params.salmon_dir} \
--names {params.sample_name_list} \ --names {params.sample_name_list} \
--column {params.salmon_merge_on} \ --column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}" --output {output.salmon_out}) \
1> {log.stdout} 2> {log.stderr}"
rule pe_fastqc: rule pe_fastqc:
'''A quality control tool for high throughput sequence data''' '''
A quality control tool for high throughput sequence data
'''
input: input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], reads1 = lambda wildcards:
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"] samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output: output:
outdir1 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate1_fastqc")), outdir1 = directory(os.path.join(
outdir2 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate2_fastqc")) config["output_dir"],
threads: "paired_end",
2 "{sample}",
"mate1_fastqc")),
outdir2 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate2_fastqc"))
threads: 2
singularity: singularity:
"docker://zavolab/fastqc:0.11.9-slim" "docker://zavolab/fastqc:0.11.9-slim"
log: log:
os.path.join(config["log_dir"],"paired_end", "{sample}", "fastqc.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stdout.log")
shell: shell:
"(mkdir -p {output.outdir1}; \ "(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \ mkdir -p {output.outdir2}; \
fastqc --outdir {output.outdir1} {input.reads1} & \ fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir2} {input.reads2}) &> {log}" fastqc --outdir {output.outdir2} {input.reads2}); \
1> {log.stdout} 2> {log.stderr}"
rule pe_remove_adapters_cutadapt: rule pe_remove_adapters_cutadapt:
'''Remove adapters''' '''
Remove adapters
'''
input: input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], reads1 = lambda wildcards:
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"] samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output: output:
reads1 = os.path.join( reads1 = os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"), "{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join( reads2 = os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"{sample}.remove_adapters_mate2.fastq.gz") "{sample}.remove_adapters_mate2.fastq.gz")
params: params:
adapter_3_mate1 = lambda wildcards: adapter_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'], samples_table.loc[wildcards.sample, 'fq1_3p'],
...@@ -47,11 +77,24 @@ rule pe_remove_adapters_cutadapt: ...@@ -47,11 +77,24 @@ rule pe_remove_adapters_cutadapt:
samples_table.loc[wildcards.sample, 'fq2_3p'], samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards: adapter_5_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_5p'] samples_table.loc[wildcards.sample, 'fq2_5p']
singularity: singularity:
"docker://zavolab/cutadapt:1.16-slim" "docker://zavolab/cutadapt:1.16-slim"
threads: 8 threads: 8
log: log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_adapters_cutadapt.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell: shell:
"(cutadapt \ "(cutadapt \
-e 0.1 \ -e 0.1 \
...@@ -66,11 +109,14 @@ rule pe_remove_adapters_cutadapt: ...@@ -66,11 +109,14 @@ rule pe_remove_adapters_cutadapt:
-o {output.reads1} \ -o {output.reads1} \
-p {output.reads2} \ -p {output.reads2} \
{input.reads1} \ {input.reads1} \
{input.reads2}) &> {log}" {input.reads2}); \
1> {log.stdout} 2>{log.stderr}"
rule pe_remove_polya_cutadapt: rule pe_remove_polya_cutadapt:
'''Remove polyA tails''' '''
Remove polyA tails
'''
input: input:
reads1 = os.path.join( reads1 = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -82,6 +128,7 @@ rule pe_remove_polya_cutadapt: ...@@ -82,6 +128,7 @@ rule pe_remove_polya_cutadapt:
"paired_end", "paired_end",
"{sample}", "{sample}",
"{sample}.remove_adapters_mate2.fastq.gz") "{sample}.remove_adapters_mate2.fastq.gz")
output: output:
reads1 = os.path.join( reads1 = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -93,18 +140,32 @@ rule pe_remove_polya_cutadapt: ...@@ -93,18 +140,32 @@ rule pe_remove_polya_cutadapt:
"paired_end", "paired_end",
"{sample}", "{sample}",
"{sample}.remove_polya_mate2.fastq.gz") "{sample}.remove_polya_mate2.fastq.gz")
params: params:
polya_3_mate1 = lambda wildcards: polya_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_polya'], samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards: polya_3_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_polya'], samples_table.loc[wildcards.sample, 'fq2_polya']
singularity: singularity:
"docker://zavolab/cutadapt:1.16-slim" "docker://zavolab/cutadapt:1.16-slim"
threads: 8 threads: 8
log: log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_polya_cutadapt.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell: shell:
'(cutadapt \ "(cutadapt \
--match-read-wildcards \ --match-read-wildcards \
-j {threads} \ -j {threads} \
--pair-filter=both \ --pair-filter=both \
...@@ -118,11 +179,14 @@ rule pe_remove_polya_cutadapt: ...@@ -118,11 +179,14 @@ rule pe_remove_polya_cutadapt:
-o {output.reads1} \ -o {output.reads1} \
-p {output.reads2} \ -p {output.reads2} \
{input.reads1} \ {input.reads1} \
{input.reads2}) &> {log}' {input.reads2};) \
1> {log.stdout} 2>{log.stderr}"
rule pe_map_genome_star: rule pe_map_genome_star:
'''Map to genome using STAR''' '''
Map to genome using STAR
'''
input: input:
index = lambda wildcards: index = lambda wildcards:
os.path.join( os.path.join(
...@@ -141,6 +205,7 @@ rule pe_map_genome_star: ...@@ -141,6 +205,7 @@ rule pe_map_genome_star:
"paired_end", "paired_end",
"{sample}", "{sample}",
"{sample}.remove_polya_mate2.fastq.gz") "{sample}.remove_polya_mate2.fastq.gz")
output: output:
bam = os.path.join( bam = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -154,6 +219,7 @@ rule pe_map_genome_star: ...@@ -154,6 +219,7 @@ rule pe_map_genome_star:
"{sample}", "{sample}",
"map_genome", "map_genome",
"{sample}_Log.final.out") "{sample}_Log.final.out")
params: params:
sample_id = "{sample}", sample_id = "{sample}",
index = lambda wildcards: index = lambda wildcards:
...@@ -181,7 +247,11 @@ rule pe_map_genome_star: ...@@ -181,7 +247,11 @@ rule pe_map_genome_star:
threads: 12 threads: 12
log: log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "map_genome_star.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"map_genome_star.stderr.log")
shell: shell:
"(STAR \ "(STAR \
...@@ -204,11 +274,14 @@ rule pe_map_genome_star: ...@@ -204,11 +274,14 @@ rule pe_map_genome_star:
--outFilterType BySJout \ --outFilterType BySJout \
--outReadsUnmapped None \ --outReadsUnmapped None \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \ --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}" --alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule pe_index_genomic_alignment_samtools: rule pe_index_genomic_alignment_samtools:
'''Index the genomic alignment''' '''
Index the genomic alignment
'''
input: input:
bam = os.path.join( bam = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -223,16 +296,31 @@ rule pe_index_genomic_alignment_samtools: ...@@ -223,16 +296,31 @@ rule pe_index_genomic_alignment_samtools:
"{sample}", "{sample}",
"map_genome", "map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"), "{sample}_Aligned.sortedByCoord.out.bam.bai"),
singularity: singularity:
"docker://zavolab/samtools:1.10-slim" "docker://zavolab/samtools:1.10-slim"
log: log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell: shell:
"(samtools index {input.bam} {output.bai};) &> {log}" "(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule pe_quantification_salmon: rule pe_quantification_salmon:
'''Quantification at transcript and gene level using Salmon''' '''
Quantification at transcript and gene level using Salmon
'''
input: input:
reads1 = os.path.join( reads1 = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -252,7 +340,8 @@ rule pe_quantification_salmon: ...@@ -252,7 +340,8 @@ rule pe_quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]), str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]), str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx") "salmon.idx")
output:
output:
gn_estimates = os.path.join( gn_estimates = os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
...@@ -265,6 +354,7 @@ rule pe_quantification_salmon: ...@@ -265,6 +354,7 @@ rule pe_quantification_salmon:
"{sample}", "{sample}",
"salmon_quant", "salmon_quant",
"quant.sf") "quant.sf")
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -273,11 +363,24 @@ rule pe_quantification_salmon: ...@@ -273,11 +363,24 @@ rule pe_quantification_salmon:
"salmon_quant"), "salmon_quant"),
libType = lambda wildcards: libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype'] samples_table.loc[wildcards.sample, 'libtype']
log: log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_salmon.log") stderr = os.path.join(
threads: 6 config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stdout.log"),
threads: 6
singularity: singularity:
"docker://zavolab/salmon:1.1.0-slim" "docker://zavolab/salmon:1.1.0-slim"
shell: shell:
"(salmon quant \ "(salmon quant \
--libType {params.libType} \ --libType {params.libType} \
...@@ -289,11 +392,13 @@ rule pe_quantification_salmon: ...@@ -289,11 +392,13 @@ rule pe_quantification_salmon:
--geneMap {input.gtf} \ --geneMap {input.gtf} \
-1 {input.reads1} \ -1 {input.reads1} \
-2 {input.reads2} \ -2 {input.reads2} \
-o {params.output_dir}) &> {log}" -o {params.output_dir}) 1> {log.stdout} 2> {log.stderr}"
rule pe_genome_quantification_kallisto: rule pe_genome_quantification_kallisto:
'''Quantification at transcript and gene level using Kallisto''' '''
Quantification at transcript and gene level using Kallisto
'''
input: input:
reads1 = os.path.join( reads1 = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -310,6 +415,7 @@ rule pe_genome_quantification_kallisto: ...@@ -310,6 +415,7 @@ rule pe_genome_quantification_kallisto:
config["kallisto_indexes"], config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'], samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx") "kallisto.idx")
output: output:
pseudoalignment = os.path.join( pseudoalignment = os.path.join(
config["output_dir"], config["output_dir"],
...@@ -317,24 +423,33 @@ rule pe_genome_quantification_kallisto: ...@@ -317,24 +423,33 @@ rule pe_genome_quantification_kallisto:
"{sample}", "{sample}",
"quant_kallisto", "quant_kallisto",
"{sample}.kallisto.pseudo.sam") "{sample}.kallisto.pseudo.sam")
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"quant_kallisto"), "quant_kallisto"),
directionality = lambda wildcards: directionality = lambda wildcards:
samples_table.loc[wildcards.sample, "kallisto_directionality"] samples_table.loc[wildcards.sample, "kallisto_directionality"]
singularity: singularity:
"docker://zavolab/kallisto:0.46.1-slim" "docker://zavolab/kallisto:0.46.1-slim"
threads: 8
threads: 8
log: log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_kallisto.log") stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
shell: shell:
"(kallisto quant \ "(kallisto quant \
-i {input.index} \ -i {input.index} \
-o {params.output_dir} \ -o {params.output_dir} \
--pseudobam \ --pseudobam \
{params.directionality} \ {params.directionality} \
{input.reads1} {input.reads2} > {output.pseudoalignment}) &> {log}" {input.reads1} {input.reads2} > {output.pseudoalignment}) \
2> {log.stderr}"
import os import os
rule fastqc: rule fastqc:
''' A quality control tool for high throughput sequence data. ''' '''
A quality control tool for high throughput sequence data.
'''
input: input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output: output:
outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "mate1_fastqc")) outdir = directory(os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"mate1_fastqc"))
params: params:
seqmode= lambda wildcards: samples_table.loc[wildcards.sample, "seqmode"] seqmode = lambda wildcards:
samples_table.loc[wildcards.sample, "seqmode"]
singularity: singularity:
"docker://zavolab/fastqc:0.11.9-slim" "docker://zavolab/fastqc:0.11.9-slim"
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "fastqc.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stdout.log")
shell: shell:
"(mkdir -p {output.outdir}; \ "(mkdir -p {output.outdir}; \
fastqc \ fastqc \
--outdir {output.outdir} \ --outdir {output.outdir} \
{input.reads}) &> {log}" {input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_adapters_cutadapt: rule remove_adapters_cutadapt:
''' Remove adapters ''' '''
Remove adapters
'''
input: input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"] reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output: output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz") reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
params: params:
adapters_3 = lambda wildcards: adapters_3 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'], samples_table.loc[wildcards.sample, 'fq1_3p'],
adapters_5 = lambda wildcards: adapters_5 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_5p'] samples_table.loc[wildcards.sample, 'fq1_5p']
singularity: singularity:
"docker://zavolab/cutadapt:1.16-slim" "docker://zavolab/cutadapt:1.16-slim"
threads: 8 threads: 8
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_adapters_cutadapt.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell: shell:
"(cutadapt \ "(cutadapt \
-e 0.1 \ -e 0.1 \
...@@ -45,23 +89,49 @@ rule remove_adapters_cutadapt: ...@@ -45,23 +89,49 @@ rule remove_adapters_cutadapt:
-a {params.adapters_3} \ -a {params.adapters_3} \
-g {params.adapters_5} \ -g {params.adapters_5} \
-o {output.reads} \ -o {output.reads} \
{input.reads}) &> {log}" {input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_polya_cutadapt: rule remove_polya_cutadapt:
''' Remove ployA tails''' '''
Remove ployA tails
'''
input: input:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz") reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
output: output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz") reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
params: params:
polya_3 = lambda wildcards: polya_3 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1_polya"] samples_table.loc[wildcards.sample, "fq1_polya"]
singularity: singularity:
"docker://zavolab/cutadapt:1.16-slim" "docker://zavolab/cutadapt:1.16-slim"
threads: 8 threads: 8
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_polya_cutadapt.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stdout.log")
shell: shell:
"(cutadapt \ "(cutadapt \
--match-read-wildcards \ --match-read-wildcards \
...@@ -73,51 +143,75 @@ rule remove_polya_cutadapt: ...@@ -73,51 +143,75 @@ rule remove_polya_cutadapt:
-m 10 \ -m 10 \
-a {params.polya_3} \ -a {params.polya_3} \
-o {output.reads} \ -o {output.reads} \
{input.reads}) &> {log}" {input.reads}); \
1> {log.stdout} 2> {log.stderr}"
rule map_genome_star: rule map_genome_star:
''' Map to genome using STAR. ''' '''
Map to genome using STAR
'''
input: input:
index = lambda wildcards: index = lambda wildcards:
os.path.join( os.path.join(
config["star_indexes"], config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]), str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]), str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index","chrNameLength.txt"), "STAR_index",
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz") "chrNameLength.txt"),
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
output: output:
bam = os.path.join(config["output_dir"], "single_end", bam = os.path.join(
"{sample}", config["output_dir"],
"map_genome", "single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"), "{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(config["output_dir"], "single_end", logfile = os.path.join(
"{sample}", config["output_dir"],
"map_genome", "single_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out") "{sample}_Log.final.out")
params: params:
sample_id = "{sample}", sample_id = "{sample}",
index = lambda wildcards: index = lambda wildcards:
os.path.join( os.path.join(
config["star_indexes"], config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]), str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]), str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index"), "STAR_index"),
outFileNamePrefix = os.path.join( outFileNamePrefix = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "map_genome", "{sample}_"), "{sample}",
"map_genome",
"{sample}_"),
multimappers = lambda wildcards: multimappers = lambda wildcards:
samples_table.loc[wildcards.sample, "multimappers"], samples_table.loc[wildcards.sample, "multimappers"],
soft_clip = lambda wildcards: soft_clip = lambda wildcards:
samples_table.loc[wildcards.sample, "soft_clip"], samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards: pass_mode = lambda wildcards:
samples_table.loc[wildcards.sample, "pass_mode"], samples_table.loc[wildcards.sample, "pass_mode"],
singularity: singularity:
"docker://zavolab/star:2.7.3a-slim" "docker://zavolab/star:2.7.3a-slim"
threads: 12 threads: 12
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "map_genome_star.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"map_genome_star.stderr.log")
shell: shell:
"(STAR \ "(STAR \
--runMode alignReads \ --runMode alignReads \
...@@ -139,39 +233,61 @@ rule map_genome_star: ...@@ -139,39 +233,61 @@ rule map_genome_star:
--outFilterType BySJout \ --outFilterType BySJout \
--outReadsUnmapped None \ --outReadsUnmapped None \
--outSAMattrRGline ID:rcrunch SM:{params.sample_id} \ --outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}" --alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule index_genomic_alignment_samtools: rule index_genomic_alignment_samtools:
'''Index genome bamfile using samtools.''' '''
Index genome bamfile using samtools
'''
input: input:
bam = os.path.join(config["output_dir"], bam = os.path.join(
"single_end", config["output_dir"],
"{sample}", "single_end",
"map_genome", "{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam") "{sample}_Aligned.sortedByCoord.out.bam")
output: output:
bai = os.path.join(config["output_dir"], bai = os.path.join(
"single_end", config["output_dir"],
"{sample}", "single_end",
"map_genome", "{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai") "{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity: singularity:
"docker://zavolab/samtools:1.10-slim" "docker://zavolab/samtools:1.10-slim"
threads: 1 threads: 1
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "index_genomic_alignment_samtools.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell: shell:
"(samtools index {input.bam} {output.bai};) &> {log}" "(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule quantification_salmon: rule quantification_salmon:
''' Quantification at transcript and gene level using Salmon. ''' '''
Quantification at transcript and gene level using Salmon
'''
input: input:
reads = os.path.join( reads = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"{sample}.remove_polya_mate1.fastq.gz"), "{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards: index = lambda wildcards:
os.path.join( os.path.join(
...@@ -179,33 +295,49 @@ rule quantification_salmon: ...@@ -179,33 +295,49 @@ rule quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]), str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]), str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx"), "salmon.idx"),
gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"] gtf = lambda wildcards:
samples_table.loc[wildcards.sample, "gtf_filtered"]
output: output:
gn_estimates = os.path.join( gn_estimates = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"salmon_quant", "salmon_quant",
"quant.genes.sf"), "quant.genes.sf"),
tr_estimates = os.path.join( tr_estimates = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"salmon_quant", "salmon_quant",
"quant.sf") "quant.sf")
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"salmon_quant"), "salmon_quant"),
libType = lambda wildcards: libType = lambda wildcards:
samples_table.loc[wildcards.sample, "libtype"] samples_table.loc[wildcards.sample, "libtype"]
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "quantification_salmon.log") stderr = os.path.join(
threads: 12 config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stdout.log")
threads: 12
singularity: singularity:
"docker://zavolab/salmon:1.1.0-slim" "docker://zavolab/salmon:1.1.0-slim"
shell: shell:
"(salmon quant \ "(salmon quant \
--libType {params.libType} \ --libType {params.libType} \
...@@ -216,43 +348,59 @@ rule quantification_salmon: ...@@ -216,43 +348,59 @@ rule quantification_salmon:
--index {input.index} \ --index {input.index} \
--geneMap {input.gtf} \ --geneMap {input.gtf} \
--unmatedReads {input.reads} \ --unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}" -o {params.output_dir};) \
1> {log.stdout} 2> {log.stderr}"
rule genome_quantification_kallisto: rule genome_quantification_kallisto:
''' Quantification at transcript and gene level using Kallisto. ''' '''
Quantification at transcript and gene level using Kallisto
'''
input: input:
reads = os.path.join( reads = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"{sample}.remove_polya_mate1.fastq.gz"), "{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards: index = lambda wildcards:
os.path.join( os.path.join(
config["kallisto_indexes"], config["kallisto_indexes"],
samples_table.loc[wildcards.sample, "organism"], samples_table.loc[wildcards.sample, "organism"],
"kallisto.idx") "kallisto.idx")
output: output:
pseudoalignment = os.path.join( pseudoalignment = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"quant_kallisto", "quant_kallisto",
"{sample}.kallisto.pseudo.sam") "{sample}.kallisto.pseudo.sam")
params: params:
output_dir = os.path.join( output_dir = os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"quant_kallisto"), "quant_kallisto"),
fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'], fraglen = lambda wildcards:
fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'], samples_table.loc[wildcards.sample, 'mean'],
directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality'] fragsd = lambda wildcards:
threads: 8 samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log: log:
os.path.join(config["log_dir"], "single_end", "{sample}", "genome_quantification_kallisto.log.log") stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
singularity: singularity:
"docker://zavolab/kallisto:0.46.1-slim" "docker://zavolab/kallisto:0.46.1-slim"
shell: shell:
"(kallisto quant \ "(kallisto quant \
-i {input.index} \ -i {input.index} \
...@@ -262,5 +410,5 @@ rule genome_quantification_kallisto: ...@@ -262,5 +410,5 @@ rule genome_quantification_kallisto:
-s {params.fragsd} \ -s {params.fragsd} \
--pseudobam \ --pseudobam \
{params.directionality} \ {params.directionality} \
{input.reads} > {output.pseudoalignment}) &> {log}" {input.reads} > {output.pseudoalignment};) \
2> {log.stderr}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment