Skip to content
Snippets Groups Projects
Commit ff0a39fa authored by BIOPZ-Katsantoni Maria's avatar BIOPZ-Katsantoni Maria Committed by BIOPZ-Gypas Foivos
Browse files

Separate stdout and stderr logs for the majority of rules. Closes #76 and #79

parent f31261b5
No related branches found
No related tags found
No related merge requests found
......@@ -24,135 +24,138 @@ os.makedirs(
os.getcwd(),
config['log_dir'],
),
exist_ok=True,
)
exist_ok=True)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
exist_ok=True,
)
exist_ok=True)
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
# Final rule
rule finish:
"""Rule for collecting outputs"""
"""
Rule for collecting outputs
"""
input:
outdir1 = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"mate1_fastqc",
),
"mate1_fastqc"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
salmon_gn_estimates = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf",
),
"quant.genes.sf"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
pseudoalignment = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam",
),
"{sample}.kallisto.pseudo.sam"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
TIN_score = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv",
),
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
salmon_merge_genes = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"])
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
salmon_merge_genes = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"])
rule create_index_star:
"""Create index for STAR alignments"""
"""
Create index for STAR alignments
"""
input:
genome = lambda wildcards:
samples_table['genome'][
samples_table['organism'] == wildcards.organism
][0],
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0],
samples_table['gtf']
[samples_table['organism'] == wildcards.organism]
[0]
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt",
),
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrName.txt",
),
"chrName.txt")
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
),
"STAR_index"),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index/STAR_",
),
sjdbOverhang = "{index_size}",
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
singularity:
"docker://zavolab/star:2.7.3a-slim"
threads: 12
log:
os.path.join(
stderr = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.log",
)
"{organism}_{index_size}_create_index_star.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
......@@ -163,223 +166,292 @@ rule create_index_star:
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon:
"""Create index for Salmon quantification"""
"""
Create index for Salmon quantification
"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0]
samples_table['tr_fasta_filtered']
[samples_table['organism'] == wildcards.organism]
[0]
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
"salmon.idx",
)
),
"salmon.idx"))
params:
kmerLen = "{kmer}",
kmerLen = "{kmer}"
singularity:
"docker://zavolab/salmon:1.1.0-slim"
log:
os.path.join(
stderr = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.log"
)
"{organism}_{kmer}_create_index_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
threads: 8
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) &> {log}"
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_kallisto:
"""Create index for Kallisto quantification"""
"""
Create index for Kallisto quantification
"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0],
samples_table['tr_fasta_filtered']
[samples_table['organism'] == wildcards.organism]
[0]
output:
index = os.path.join(
config['kallisto_indexes'],
"{organism}",
"kallisto.idx",
),
"kallisto.idx")
params:
output_dir = os.path.join(
config['kallisto_indexes'],
"{organism}",
),
"{organism}")
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
log:
os.path.join(
stderr = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.log"
)
"{organism}_create_index_kallisto.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
kallisto index -i {output.index} {input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
rule extract_transcripts_as_bed12:
"""Convert transcripts to BED12 format"""
"""
Convert transcripts to BED12 format
"""
input:
gtf = lambda wildcards:
samples_table['gtf'][0],
samples_table['gtf']
[0]
output:
bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed",
),
"full_transcripts_protein_coding.bed")
singularity:
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim"
threads: 1
log:
os.path.join(
stderr = os.path.join(
config['log_dir'],
"extract_transcripts_as_bed12.log",
)
"extract_transcripts_as_bed12.stderr.log")
shell:
"gtf_transcript_type_to_bed12.pl \
"(gtf_transcript_type_to_bed12.pl \
--anno={input.gtf} \
--type=protein_coding \
1> {output.bed12} \
2> {log}"
--type=protein_coding > {output.bed12}); \
2> {log.stderr}"
rule calculate_TIN_scores:
"""Caluclate transcript integrity (TIN) score"""
"""
Caluclate transcript integrity (TIN) score
"""
input:
bai = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"
),
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
transcripts_bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed"
),
"full_transcripts_protein_coding.bed")
output:
TIN_score = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv",
),
"TIN_score.tsv")
params:
bam = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"
),
sample = "{sample}",
"{sample}_Aligned.sortedByCoord.out.bam"),
sample = "{sample}"
log:
os.path.join(
stderr = os.path.join(
config['log_dir'],
"{seqmode}",
"{sample}",
"calculate_TIN_scores.log",
)
"calculate_TIN_scores.log")
threads: 8
singularity:
"docker://zavolab/tin_score_calculation:0.1.0-slim"
shell:
"tin_score_calculation.py \
"(tin_score_calculation.py \
-i {params.bam} \
-r {input.transcripts_bed12} \
-c 0 \
--names {params.sample} \
-n 100 \
1> {output.TIN_score} \
2> {log}"
-n 100 > {output.TIN_score};) 2> {log.stderr}"
rule salmon_quantmerge_genes:
''' Merge gene quantifications into a single file. '''
'''
Merge gene quantifications into a single file
'''
input:
salmon_in = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"]))
output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(os.path.join(
salmon_out = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
log:
os.path.join(config["log_dir"], "salmon_quantmerge_genes_{salmon_merge_on}.log")
threads: 1
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--genes \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}"
--output {output.salmon_out};) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_transcripts:
''' Merge gene quantifications into a single file. '''
'''
Merge gene quantifications into a single file
'''
input:
salmon_in = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(os.path.join(
salmon_out = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
log:
os.path.join(config["log_dir"], "salmon_quantmerge_transcripts_{salmon_merge_on}.log")
threads: 1
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}"
--output {output.salmon_out}) \
1> {log.stdout} 2> {log.stderr}"
rule pe_fastqc:
'''A quality control tool for high throughput sequence data'''
'''
A quality control tool for high throughput sequence data
'''
input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
outdir1 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate1_fastqc")),
outdir2 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate2_fastqc"))
threads:
2
outdir1 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate1_fastqc")),
outdir2 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate2_fastqc"))
threads: 2
singularity:
"docker://zavolab/fastqc:0.11.9-slim"
log:
os.path.join(config["log_dir"],"paired_end", "{sample}", "fastqc.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
fastqc --outdir {output.outdir1} {input.reads1} & \
fastqc --outdir {output.outdir2} {input.reads2}) &> {log}"
fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir2} {input.reads2}); \
1> {log.stdout} 2> {log.stderr}"
rule pe_remove_adapters_cutadapt:
'''Remove adapters'''
'''
Remove adapters
'''
input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
params:
adapter_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
......@@ -47,11 +77,24 @@ rule pe_remove_adapters_cutadapt:
samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_5p']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_adapters_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
......@@ -66,11 +109,14 @@ rule pe_remove_adapters_cutadapt:
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
{input.reads2}) &> {log}"
{input.reads2}); \
1> {log.stdout} 2>{log.stderr}"
rule pe_remove_polya_cutadapt:
'''Remove polyA tails'''
'''
Remove polyA tails
'''
input:
reads1 = os.path.join(
config["output_dir"],
......@@ -82,6 +128,7 @@ rule pe_remove_polya_cutadapt:
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
output:
reads1 = os.path.join(
config["output_dir"],
......@@ -93,18 +140,32 @@ rule pe_remove_polya_cutadapt:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
params:
polya_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_polya'],
samples_table.loc[wildcards.sample, 'fq2_polya']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_polya_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
'(cutadapt \
"(cutadapt \
--match-read-wildcards \
-j {threads} \
--pair-filter=both \
......@@ -118,11 +179,14 @@ rule pe_remove_polya_cutadapt:
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
{input.reads2}) &> {log}'
{input.reads2};) \
1> {log.stdout} 2>{log.stderr}"
rule pe_map_genome_star:
'''Map to genome using STAR'''
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
......@@ -141,6 +205,7 @@ rule pe_map_genome_star:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
output:
bam = os.path.join(
config["output_dir"],
......@@ -154,6 +219,7 @@ rule pe_map_genome_star:
"{sample}",
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}",
index = lambda wildcards:
......@@ -181,7 +247,11 @@ rule pe_map_genome_star:
threads: 12
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "map_genome_star.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
......@@ -204,11 +274,14 @@ rule pe_map_genome_star:
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}"
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule pe_index_genomic_alignment_samtools:
'''Index the genomic alignment'''
'''
Index the genomic alignment
'''
input:
bam = os.path.join(
config["output_dir"],
......@@ -223,16 +296,31 @@ rule pe_index_genomic_alignment_samtools:
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
singularity:
"docker://zavolab/samtools:1.10-slim"
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule pe_quantification_salmon:
'''Quantification at transcript and gene level using Salmon'''
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads1 = os.path.join(
config["output_dir"],
......@@ -252,7 +340,8 @@ rule pe_quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx")
output:
output:
gn_estimates = os.path.join(
config["output_dir"],
"paired_end",
......@@ -265,6 +354,7 @@ rule pe_quantification_salmon:
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
config["output_dir"],
......@@ -273,11 +363,24 @@ rule pe_quantification_salmon:
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype']
log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_salmon.log")
threads: 6
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stdout.log"),
threads: 6
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quant \
--libType {params.libType} \
......@@ -289,11 +392,13 @@ rule pe_quantification_salmon:
--geneMap {input.gtf} \
-1 {input.reads1} \
-2 {input.reads2} \
-o {params.output_dir}) &> {log}"
-o {params.output_dir}) 1> {log.stdout} 2> {log.stderr}"
rule pe_genome_quantification_kallisto:
'''Quantification at transcript and gene level using Kallisto'''
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads1 = os.path.join(
config["output_dir"],
......@@ -310,6 +415,7 @@ rule pe_genome_quantification_kallisto:
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
......@@ -317,24 +423,33 @@ rule pe_genome_quantification_kallisto:
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
params:
output_dir = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, "kallisto_directionality"]
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
threads: 8
threads: 8
log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_kallisto.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--pseudobam \
{params.directionality} \
{input.reads1} {input.reads2} > {output.pseudoalignment}) &> {log}"
{input.reads1} {input.reads2} > {output.pseudoalignment}) \
2> {log.stderr}"
import os
rule fastqc:
''' A quality control tool for high throughput sequence data. '''
'''
A quality control tool for high throughput sequence data.
'''
input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output:
outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "mate1_fastqc"))
outdir = directory(os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"mate1_fastqc"))
params:
seqmode= lambda wildcards: samples_table.loc[wildcards.sample, "seqmode"]
seqmode = lambda wildcards:
samples_table.loc[wildcards.sample, "seqmode"]
singularity:
"docker://zavolab/fastqc:0.11.9-slim"
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "fastqc.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
--outdir {output.outdir} \
{input.reads}) &> {log}"
{input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_adapters_cutadapt:
''' Remove adapters '''
'''
Remove adapters
'''
input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
params:
adapters_3 = lambda wildcards:
adapters_3 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapters_5 = lambda wildcards:
adapters_5 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_5p']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_adapters_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
......@@ -45,23 +89,49 @@ rule remove_adapters_cutadapt:
-a {params.adapters_3} \
-g {params.adapters_5} \
-o {output.reads} \
{input.reads}) &> {log}"
{input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_polya_cutadapt:
''' Remove ployA tails'''
'''
Remove ployA tails
'''
input:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
params:
polya_3 = lambda wildcards:
polya_3 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1_polya"]
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_polya_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stdout.log")
shell:
"(cutadapt \
--match-read-wildcards \
......@@ -73,51 +143,75 @@ rule remove_polya_cutadapt:
-m 10 \
-a {params.polya_3} \
-o {output.reads} \
{input.reads}) &> {log}"
{input.reads}); \
1> {log.stdout} 2> {log.stderr}"
rule map_genome_star:
''' Map to genome using STAR. '''
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index","chrNameLength.txt"),
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz")
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index",
"chrNameLength.txt"),
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
output:
bam = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
bam = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
logfile = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}",
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index"),
outFileNamePrefix = os.path.join(
config["output_dir"],
"single_end",
"{sample}", "map_genome", "{sample}_"),
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_"),
multimappers = lambda wildcards:
samples_table.loc[wildcards.sample, "multimappers"],
soft_clip = lambda wildcards:
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
samples_table.loc[wildcards.sample, "pass_mode"],
singularity:
"docker://zavolab/star:2.7.3a-slim"
threads: 12
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "map_genome_star.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
--runMode alignReads \
......@@ -139,39 +233,61 @@ rule map_genome_star:
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}"
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule index_genomic_alignment_samtools:
'''Index genome bamfile using samtools.'''
'''
Index genome bamfile using samtools
'''
input:
bam = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
bam = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
bai = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.10-slim"
threads: 1
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule quantification_salmon:
''' Quantification at transcript and gene level using Salmon. '''
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards:
os.path.join(
......@@ -179,33 +295,49 @@ rule quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx"),
gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"]
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, "gtf_filtered"]
output:
gn_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stdout.log")
threads: 12
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quant \
--libType {params.libType} \
......@@ -216,43 +348,59 @@ rule quantification_salmon:
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}"
-o {params.output_dir};) \
1> {log.stdout} 2> {log.stderr}"
rule genome_quantification_kallisto:
''' Quantification at transcript and gene level using Kallisto. '''
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, "organism"],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards:
samples_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards:
samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "genome_quantification_kallisto.log.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
shell:
"(kallisto quant \
-i {input.index} \
......@@ -262,5 +410,5 @@ rule genome_quantification_kallisto:
-s {params.fragsd} \
--pseudobam \
{params.directionality} \
{input.reads} > {output.pseudoalignment}) &> {log}"
{input.reads} > {output.pseudoalignment};) \
2> {log.stderr}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment