Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Z
ZARP
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
pipelines
ZARP
Commits
ff0a39fa
Commit
ff0a39fa
authored
5 years ago
by
BIOPZ-Katsantoni Maria
Committed by
BIOPZ-Gypas Foivos
5 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Separate stdout and stderr logs for the majority of rules. Closes
#76
and
#79
parent
f31261b5
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Snakefile
+222
-150
222 additions, 150 deletions
Snakefile
workflow/rules/paired_end.snakefile.smk
+157
-42
157 additions, 42 deletions
workflow/rules/paired_end.snakefile.smk
workflow/rules/single_end.snakefile.smk
+229
-81
229 additions, 81 deletions
workflow/rules/single_end.snakefile.smk
with
608 additions
and
273 deletions
Snakefile
+
222
−
150
View file @
ff0a39fa
...
...
@@ -24,135 +24,138 @@ os.makedirs(
os.getcwd(),
config['log_dir'],
),
exist_ok=True
,
)
exist_ok=True
)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
exist_ok=True,
)
exist_ok=True)
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
# Final rule
rule finish:
"""Rule for collecting outputs"""
"""
Rule for collecting outputs
"""
input:
outdir1 = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"mate1_fastqc",
),
"mate1_fastqc"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
salmon_gn_estimates = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf",
),
"quant.genes.sf"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
pseudoalignment = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam",
),
"{sample}.kallisto.pseudo.sam"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
TIN_score = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv",
),
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
salmon_merge_genes = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv"), salmon_merge_on=["tpm", "numreads"])
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
salmon_merge_genes = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"])
rule create_index_star:
"""Create index for STAR alignments"""
"""
Create index for STAR alignments
"""
input:
genome = lambda wildcards:
samples_table['genome']
[
samples_table['organism'] == wildcards.organism
]
[0],
samples_table['genome']
[
samples_table['organism'] == wildcards.organism
]
[0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0],
samples_table['gtf']
[samples_table['organism'] == wildcards.organism]
[0]
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt",
),
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrName.txt"
,
),
"chrName.txt"
)
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
),
"STAR_index"),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index/STAR_",
),
sjdbOverhang = "{index_size}",
"STAR_index/STAR_"
)
,
sjdbOverhang = "{index_size}"
singularity:
"docker://zavolab/star:2.7.3a-slim"
threads: 12
log:
os.path.join(
stderr =
os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.log",
)
"{organism}_{index_size}_create_index_star.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
...
...
@@ -163,223 +166,292 @@ rule create_index_star:
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon:
"""Create index for Salmon quantification"""
"""
Create index for Salmon quantification
"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0]
samples_table['tr_fasta_filtered']
[samples_table['organism'] == wildcards.organism]
[0]
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
"salmon.idx",
)
),
"salmon.idx"))
params:
kmerLen = "{kmer}",
kmerLen = "{kmer}"
singularity:
"docker://zavolab/salmon:1.1.0-slim"
log:
os.path.join(
stderr =
os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.log"
)
"{organism}_{kmer}_create_index_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
threads: 8
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) &> {log}"
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_kallisto:
"""Create index for Kallisto quantification"""
"""
Create index for Kallisto quantification
"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0],
samples_table['tr_fasta_filtered']
[samples_table['organism'] == wildcards.organism]
[0]
output:
index = os.path.join(
config['kallisto_indexes'],
"{organism}",
"kallisto.idx"
,
),
"kallisto.idx"
)
params:
output_dir = os.path.join(
config['kallisto_indexes'],
"{organism}"
,
),
"{organism}"
)
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
log:
os.path.join(
stderr =
os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.log"
)
"{organism}_create_index_kallisto.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
kallisto index -i {output.index} {input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
rule extract_transcripts_as_bed12:
"""Convert transcripts to BED12 format"""
"""
Convert transcripts to BED12 format
"""
input:
gtf = lambda wildcards:
samples_table['gtf'][0],
samples_table['gtf']
[0]
output:
bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed"
,
),
"full_transcripts_protein_coding.bed"
)
singularity:
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim"
threads: 1
log:
os.path.join(
stderr =
os.path.join(
config['log_dir'],
"extract_transcripts_as_bed12.log"
,
)
"extract_transcripts_as_bed12.
stderr.
log"
)
shell:
"gtf_transcript_type_to_bed12.pl \
"
(
gtf_transcript_type_to_bed12.pl \
--anno={input.gtf} \
--type=protein_coding \
1> {output.bed12} \
2> {log}"
--type=protein_coding > {output.bed12}); \
2> {log.stderr}"
rule calculate_TIN_scores:
"""Caluclate transcript integrity (TIN) score"""
"""
Caluclate transcript integrity (TIN) score
"""
input:
bai = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"
),
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
transcripts_bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed"
),
"full_transcripts_protein_coding.bed"
)
output:
TIN_score = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv"
,
),
"TIN_score.tsv"
)
params:
bam = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"
),
sample = "{sample}",
"{sample}_Aligned.sortedByCoord.out.bam"
),
sample = "{sample}"
log:
os.path.join(
stderr =
os.path.join(
config['log_dir'],
"{seqmode}",
"{sample}",
"calculate_TIN_scores.log"
,
)
"calculate_TIN_scores.log"
)
threads: 8
singularity:
"docker://zavolab/tin_score_calculation:0.1.0-slim"
shell:
"tin_score_calculation.py \
"
(
tin_score_calculation.py \
-i {params.bam} \
-r {input.transcripts_bed12} \
-c 0 \
--names {params.sample} \
-n 100 \
1> {output.TIN_score} \
2> {log}"
-n 100 > {output.TIN_score};) 2> {log.stderr}"
rule salmon_quantmerge_genes:
''' Merge gene quantifications into a single file. '''
'''
Merge gene quantifications into a single file
'''
input:
salmon_in = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"]))
output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(os.path.join(
salmon_out = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
log:
os.path.join(config["log_dir"], "salmon_quantmerge_genes_{salmon_merge_on}.log")
threads: 1
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--genes \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}"
--output {output.salmon_out};) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_transcripts:
''' Merge gene quantifications into a single file. '''
'''
Merge gene quantifications into a single file
'''
input:
salmon_in = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
output:
salmon_out = os.path.join(config["output_dir"], "summary_salmon", "quantmerge", "transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(os.path.join(
salmon_out = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
zip,
sample= list(samples_table.index.values),
seqmode= list(samples_table["seqmode"])),
sample_name_list = expand("{sample}", sample= list(samples_table.index.values)),
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
log:
os.path.join(config["log_dir"], "salmon_quantmerge_transcripts_{salmon_merge_on}.log")
threads: 1
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out}) &> {log}"
--output {output.salmon_out}) \
1> {log.stdout} 2> {log.stderr}"
This diff is collapsed.
Click to expand it.
workflow/rules/paired_end.snakefile.smk
+
157
−
42
View file @
ff0a39fa
rule pe_fastqc:
'''A quality control tool for high throughput sequence data'''
'''
A quality control tool for high throughput sequence data
'''
input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
outdir1 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate1_fastqc")),
outdir2 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate2_fastqc"))
threads:
2
outdir1 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate1_fastqc")),
outdir2 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate2_fastqc"))
threads: 2
singularity:
"docker://zavolab/fastqc:0.11.9-slim"
log:
os.path.join(config["log_dir"],"paired_end", "{sample}", "fastqc.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
fastqc --outdir {output.outdir1} {input.reads1} & \
fastqc --outdir {output.outdir2} {input.reads2}) &> {log}"
fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir2} {input.reads2}); \
1> {log.stdout} 2> {log.stderr}"
rule pe_remove_adapters_cutadapt:
'''Remove adapters'''
'''
Remove adapters
'''
input:
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
params:
adapter_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
...
...
@@ -47,11 +77,24 @@ rule pe_remove_adapters_cutadapt:
samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_5p']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_adapters_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
...
...
@@ -66,11 +109,14 @@ rule pe_remove_adapters_cutadapt:
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
{input.reads2}) &> {log}"
{input.reads2}); \
1> {log.stdout} 2>{log.stderr}"
rule pe_remove_polya_cutadapt:
'''Remove polyA tails'''
'''
Remove polyA tails
'''
input:
reads1 = os.path.join(
config["output_dir"],
...
...
@@ -82,6 +128,7 @@ rule pe_remove_polya_cutadapt:
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
output:
reads1 = os.path.join(
config["output_dir"],
...
...
@@ -93,18 +140,32 @@ rule pe_remove_polya_cutadapt:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
params:
polya_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_polya'],
samples_table.loc[wildcards.sample, 'fq2_polya']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_polya_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
'
(cutadapt \
"
(cutadapt \
--match-read-wildcards \
-j {threads} \
--pair-filter=both \
...
...
@@ -118,11 +179,14 @@ rule pe_remove_polya_cutadapt:
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
{input.reads2}) &> {log}'
{input.reads2};) \
1> {log.stdout} 2>{log.stderr}"
rule pe_map_genome_star:
'''Map to genome using STAR'''
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
...
...
@@ -141,6 +205,7 @@ rule pe_map_genome_star:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
output:
bam = os.path.join(
config["output_dir"],
...
...
@@ -154,6 +219,7 @@ rule pe_map_genome_star:
"{sample}",
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}",
index = lambda wildcards:
...
...
@@ -181,7 +247,11 @@ rule pe_map_genome_star:
threads: 12
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "map_genome_star.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
...
...
@@ -204,11 +274,14 @@ rule pe_map_genome_star:
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}"
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule pe_index_genomic_alignment_samtools:
'''Index the genomic alignment'''
'''
Index the genomic alignment
'''
input:
bam = os.path.join(
config["output_dir"],
...
...
@@ -223,16 +296,31 @@ rule pe_index_genomic_alignment_samtools:
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
singularity:
"docker://zavolab/samtools:1.10-slim"
log:
os.path.join( config["log_dir"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule pe_quantification_salmon:
'''Quantification at transcript and gene level using Salmon'''
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads1 = os.path.join(
config["output_dir"],
...
...
@@ -252,7 +340,8 @@ rule pe_quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx")
output:
output:
gn_estimates = os.path.join(
config["output_dir"],
"paired_end",
...
...
@@ -265,6 +354,7 @@ rule pe_quantification_salmon:
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
config["output_dir"],
...
...
@@ -273,11 +363,24 @@ rule pe_quantification_salmon:
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype']
log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_salmon.log")
threads: 6
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stdout.log"),
threads: 6
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quant \
--libType {params.libType} \
...
...
@@ -289,11 +392,13 @@ rule pe_quantification_salmon:
--geneMap {input.gtf} \
-1 {input.reads1} \
-2 {input.reads2} \
-o {params.output_dir})
&> {log
}"
-o {params.output_dir})
1> {log.stdout} 2> {log.stderr
}"
rule pe_genome_quantification_kallisto:
'''Quantification at transcript and gene level using Kallisto'''
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads1 = os.path.join(
config["output_dir"],
...
...
@@ -310,6 +415,7 @@ rule pe_genome_quantification_kallisto:
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
...
...
@@ -317,24 +423,33 @@ rule pe_genome_quantification_kallisto:
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
params:
output_dir = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, "kallisto_directionality"]
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
threads: 8
threads: 8
log:
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_kallisto.log")
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--pseudobam \
{params.directionality} \
{input.reads1} {input.reads2} > {output.pseudoalignment})
&> {log}"
{input.reads1} {input.reads2} > {output.pseudoalignment})
\
2> {log.stderr}"
This diff is collapsed.
Click to expand it.
workflow/rules/single_end.snakefile.smk
+
229
−
81
View file @
ff0a39fa
import os
rule fastqc:
''' A quality control tool for high throughput sequence data. '''
'''
A quality control tool for high throughput sequence data.
'''
input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output:
outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "mate1_fastqc"))
outdir = directory(os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"mate1_fastqc"))
params:
seqmode= lambda wildcards: samples_table.loc[wildcards.sample, "seqmode"]
seqmode = lambda wildcards:
samples_table.loc[wildcards.sample, "seqmode"]
singularity:
"docker://zavolab/fastqc:0.11.9-slim"
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "fastqc.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
--outdir {output.outdir} \
{input.reads}) &> {log}"
{input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_adapters_cutadapt:
''' Remove adapters '''
'''
Remove adapters
'''
input:
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
reads = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"]
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
params:
adapters_3 = lambda wildcards:
adapters_3 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapters_5 = lambda wildcards:
adapters_5 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_5p']
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_adapters_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
...
...
@@ -45,23 +89,49 @@ rule remove_adapters_cutadapt:
-a {params.adapters_3} \
-g {params.adapters_5} \
-o {output.reads} \
{input.reads}) &> {log}"
{input.reads};) \
1> {log.stdout} 2> {log.stderr}"
rule remove_polya_cutadapt:
''' Remove ployA tails'''
'''
Remove ployA tails
'''
input:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz")
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz")
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
params:
polya_3 = lambda wildcards:
polya_3 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1_polya"]
singularity:
"docker://zavolab/cutadapt:1.16-slim"
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_polya_cutadapt.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"remove_polya_cutadapt.stdout.log")
shell:
"(cutadapt \
--match-read-wildcards \
...
...
@@ -73,51 +143,75 @@ rule remove_polya_cutadapt:
-m 10 \
-a {params.polya_3} \
-o {output.reads} \
{input.reads}) &> {log}"
{input.reads}); \
1> {log.stdout} 2> {log.stderr}"
rule map_genome_star:
''' Map to genome using STAR. '''
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index","chrNameLength.txt"),
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz")
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index",
"chrNameLength.txt"),
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz")
output:
bam = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
bam = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
logfile = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}",
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index"),
outFileNamePrefix = os.path.join(
config["output_dir"],
"single_end",
"{sample}", "map_genome", "{sample}_"),
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_"),
multimappers = lambda wildcards:
samples_table.loc[wildcards.sample, "multimappers"],
soft_clip = lambda wildcards:
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
samples_table.loc[wildcards.sample, "pass_mode"],
singularity:
"docker://zavolab/star:2.7.3a-slim"
threads: 12
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "map_genome_star.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
--runMode alignReads \
...
...
@@ -139,39 +233,61 @@ rule map_genome_star:
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) &> {log}"
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
rule index_genomic_alignment_samtools:
'''Index genome bamfile using samtools.'''
'''
Index genome bamfile using samtools
'''
input:
bam = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
bam = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
bai = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.10-slim"
threads: 1
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule quantification_salmon:
''' Quantification at transcript and gene level using Salmon. '''
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards:
os.path.join(
...
...
@@ -179,33 +295,49 @@ rule quantification_salmon:
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx"),
gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"]
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, "gtf_filtered"]
output:
gn_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"quantification_salmon.stdout.log")
threads: 12
singularity:
"docker://zavolab/salmon:1.1.0-slim"
shell:
"(salmon quant \
--libType {params.libType} \
...
...
@@ -216,43 +348,59 @@ rule quantification_salmon:
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}"
-o {params.output_dir};) \
1> {log.stdout} 2> {log.stderr}"
rule genome_quantification_kallisto:
''' Quantification at transcript and gene level using Kallisto. '''
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, "organism"],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards:
samples_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards:
samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log:
os.path.join(config["log_dir"], "single_end", "{sample}", "genome_quantification_kallisto.log.log")
stderr = os.path.join(
config["log_dir"],
"single_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
shell:
"(kallisto quant \
-i {input.index} \
...
...
@@ -262,5 +410,5 @@ rule genome_quantification_kallisto:
-s {params.fragsd} \
--pseudobam \
{params.directionality} \
{input.reads} > {output.pseudoalignment})
&> {log}"
{input.reads} > {output.pseudoalignment}
;
)
\
2> {log.stderr}"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment