Skip to content
Snippets Groups Projects
Commit c58f28df authored by Alex Kanitz's avatar Alex Kanitz
Browse files

Merge branch 'log_dirs' into 'master'

create log directories in Snakefile

Closes #70

See merge request zavolan_group/pipelines/rnaseqpipeline!28
parents 1f933bcc 5e1ec85e
No related branches found
No related tags found
2 merge requests!29Add pipeline updates into MultiQC dev branch,!28create log directories in Snakefile
Pipeline #10310 passed
......@@ -173,7 +173,6 @@ your run.
```bash
cat << "EOF" > run.sh
#!/bin/bash
mkdir -p logs/local_log
snakemake \
--snakefile="../../snakemake/Snakefile" \
--configfile="config.yaml" \
......
################################################################################
### python modules
################################################################################
"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
import os
import sys
import pandas as pd
############################
import pandas as pd
samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
# Get sample table
samples_table = pd.read_csv(
config['samples'],
header=0,
index_col=0,
comment='#',
engine='python',
sep="\t",
)
# Global config
localrules: finish
##################################################################################
# Execution dependend on sequencing mode
##################################################################################
include: os.path.join('workflow', 'rules', 'paired_end.snakefile.smk')
include: os.path.join('workflow', 'rules', 'single_end.snakefile.smk')
# Create log directories
os.makedirs(
os.path.join(
os.getcwd(),
config['log_dir'],
),
exist_ok=True,
)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
exist_ok=True,
)
#################################################################################
### Final rule
#################################################################################
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
# Final rule
rule finish:
input:
outdir1 = expand(os.path.join(config["output_dir"], "{seqmode}", "{sample}", "mate1_fastqc"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
salmon_gn_estimates = expand(os.path.join(config["output_dir"],"{seqmode}","{sample}","salmon_quant","quant.genes.sf"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
pseudoalignment = expand(os.path.join(config["output_dir"],"{seqmode}","{sample}","quant_kallisto", "{sample}.kallisto.pseudo.sam"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
TIN_score = expand(os.path.join(config["output_dir"], "{seqmode}", "{sample}", "TIN", "TIN_score.tsv"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
"""Rule for collecting outputs"""
input:
outdir1 = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"mate1_fastqc",
),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
salmon_gn_estimates = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf",
),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
pseudoalignment = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam",
),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
TIN_score = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv",
),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[
samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)
]
),
rule create_index_star:
''' Create index using STAR'''
input:
genome =lambda wildcards: samples_table["genome"][samples_table["organism"]==wildcards.organism][0],
gtf =lambda wildcards: samples_table["gtf"][samples_table["organism"]==wildcards.organism][0]
output:
chromosome_info = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrName.txt")
params:
output_dir = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index"),
outFileNamePrefix = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join( config["local_log"], "{organism}_{index_size}_create_index_star.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
"""Create index for STAR alignments"""
input:
genome = lambda wildcards:
samples_table['genome'][
samples_table['organism'] == wildcards.organism
][0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0],
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt",
),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
"chrName.txt",
),
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index/STAR_",
),
sjdbOverhang = "{index_size}",
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.log",
)
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
rule create_index_salmon:
'''Create index for salmon quantification'''
input:
transcriptome = lambda wildcards: samples_table['tr_fasta_filtered'][samples_table["organism"]==wildcards.organism][0]
output:
index = directory(os.path.join(
config["salmon_indexes"],
"{organism}",
"{kmer}",
"salmon.idx"))
params:
kmerLen = "{kmer}"
singularity:
"docker://zavolab/salmon:0.11.0"
log:
os.path.join(config["local_log"], "{organism}_{kmer}_create_index_salmon.log")
threads: 8
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) &> {log}"
"""Create index for Salmon quantification"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0]
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
"salmon.idx",
)
),
params:
kmerLen = "{kmer}",
singularity:
"docker://zavolab/salmon:0.11.0"
log:
os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.log"
)
threads: 8
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) &> {log}"
rule create_index_kallisto:
'''Create index for running Kallisto'''
input:
transcriptome = lambda wildcards: samples_table['tr_fasta_filtered'][samples_table["organism"]==wildcards.organism][0]
output:
index = os.path.join(
config["kallisto_indexes"],
"{organism}",
"kallisto.idx")
params:
output_dir = os.path.join(
config["kallisto_indexes"],
"{organism}")
singularity:
"docker://zavolab/kallisto:0.46.1"
log:
os.path.join(config["local_log"], "{organism}_create_index_kallisto.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
"""Create index for Kallisto quantification"""
input:
transcriptome = lambda wildcards:
samples_table['tr_fasta_filtered'][
samples_table['organism'] == wildcards.organism
][0],
output:
index = os.path.join(
config['kallisto_indexes'],
"{organism}",
"kallisto.idx",
),
params:
output_dir = os.path.join(
config['kallisto_indexes'],
"{organism}",
),
singularity:
"docker://zavolab/kallisto:0.46.1"
log:
os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.log"
)
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
rule extract_transcripts_as_bed12:
''' Extract transcripts: from GTF into BED12 format'''
input:
gtf =lambda wildcards: samples_table["gtf"][0]
output:
bed12 = os.path.join(
config["output_dir"],
"full_transcripts_protein_coding.bed")
singularity:
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0"
threads: 1
log:
os.path.join( config["local_log"], "extract_transcripts_as_bed12.log")
shell:
"gtf_transcript_type_to_bed12.pl \
"""Convert transcripts to BED12 format"""
input:
gtf = lambda wildcards:
samples_table['gtf'][0],
output:
bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed",
),
singularity:
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0"
threads: 1
log:
os.path.join(
config['log_dir'],
"extract_transcripts_as_bed12.log",
)
shell:
"gtf_transcript_type_to_bed12.pl \
--anno={input.gtf} \
--type=protein_coding \
1> {output.bed12} \
......@@ -162,39 +257,48 @@ rule extract_transcripts_as_bed12:
rule calculate_TIN_scores:
'''Calculate TIN score'''
input:
bai = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
transcripts_bed12 = os.path.join(
config["output_dir"],
"full_transcripts_protein_coding.bed")
output:
TIN_score = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv")
params:
bam = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
sample = "{sample}"
log:
os.path.join(config["local_log"], "{seqmode}", "{sample}", "calculate_TIN_scores.log")
threads: 8
singularity:
"docker://zavolab/tin_score_calculation:0.1.0"
shell:
"tin_score_calculation.py \
"""Caluclate transcript integrity (TIN) score"""
input:
bai = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"
),
transcripts_bed12 = os.path.join(
config['output_dir'],
"full_transcripts_protein_coding.bed"
),
output:
TIN_score = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv",
),
params:
bam = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"
),
sample = "{sample}",
log:
os.path.join(
config['log_dir'],
"{seqmode}",
"{sample}",
"calculate_TIN_scores.log",
)
threads: 8
singularity:
"docker://zavolab/tin_score_calculation:0.1.0"
shell:
"tin_score_calculation.py \
-i {params.bam} \
-r {input.transcripts_bed12} \
-c 0 \
......
......@@ -305,12 +305,12 @@ def main():
# Read file and infer read size for sjdbovwerhang
with open(options.config_file, 'w') as config_file:
config_file.write('''---
output_dir: "results"
local_log: "local_log"
star_indexes: "results/star_indexes"
kallisto_indexes: "results/kallisto_indexes"
samples: "'''+ options.samples_table + '''"
salmon_indexes: "results/salmon_indexes"
output_dir: "results/"
log_dir: "logs/"
kallisto_indexes: "results/kallisto_indexes/"
salmon_indexes: "results/salmon_indexes/"
star_indexes: "results/star_indexes/"
...''')
sys.stdout.write('Create snakemake table finished successfully...\n')
......
......@@ -6,7 +6,7 @@
"threads": "1",
"mem": "4G",
"name": "{rule}.{wildcards}",
"out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
"out": "logs/cluster/{rule}.{wildcards}-%j-%N.out"
},
"generate_segemehl_index_other_RNAs":
{
......
---
output_dir: "results"
local_log: "local_log"
star_indexes: "results/star_indexes"
kallisto_indexes: "results/kallisto_indexes"
samples: "../input_files/samples.tsv"
salmon_indexes: "results/salmon_indexes"
output_dir: "results/"
log_dir: "logs/"
kallisto_indexes: "results/kallisto_indexes/"
salmon_indexes: "results/salmon_indexes/"
star_indexes: "results/star_indexes/"
...
......@@ -4,6 +4,7 @@
cleanup () {
rc=$?
rm -rf .snakemake
rm -rf logs/
cd $user_dir
echo "Exit status: $rc"
}
......
......@@ -4,6 +4,7 @@
cleanup () {
rc=$?
rm -rf .snakemake
rm -rf logs/
cd $user_dir
echo "Exit status: $rc"
}
......
......@@ -5,7 +5,6 @@ cleanup () {
rc=$?
rm -rf .java/
rm -rf .snakemake/
rm -rf local_log/
rm -rf logs/
rm -rf results/
cd $user_dir
......@@ -20,7 +19,6 @@ set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
mkdir -p logs/local_log
# Run tests
snakemake \
......
......@@ -5,7 +5,6 @@ cleanup () {
rc=$?
rm -rf .java/
rm -rf .snakemake/
rm -rf local_log/
rm -rf logs/
rm -rf results/
cd $user_dir
......@@ -20,8 +19,6 @@ set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
mkdir -p logs/cluster_log
mkdir -p logs/local_log
# Run tests
snakemake \
......
95fb0448dc6871cb415012d254260c5a config.yaml
ba5ae0649d1fb82d94f8d19481498ffd config.yaml
9aece9e4acb17143b5e8f627968e03a5 samples.tsv
95fb0448dc6871cb415012d254260c5a config.yaml
ba5ae0649d1fb82d94f8d19481498ffd config.yaml
9aece9e4acb17143b5e8f627968e03a5 samples.tsv
......@@ -13,7 +13,7 @@ rule pe_fastqc:
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
os.path.join(config["local_log"],"paired_end", "{sample}", "fastqc.log")
os.path.join(config["log_dir"],"paired_end", "{sample}", "fastqc.log")
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
......@@ -51,7 +51,7 @@ rule pe_remove_adapters_cutadapt:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join( config["local_log"], "paired_end", "{sample}", "remove_adapters_cutadapt.log")
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_adapters_cutadapt.log")
shell:
"(cutadapt \
-e 0.1 \
......@@ -102,7 +102,7 @@ rule pe_remove_polya_cutadapt:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join( config["local_log"], "paired_end", "{sample}", "remove_polya_cutadapt.log")
os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_polya_cutadapt.log")
shell:
'(cutadapt \
--match-read-wildcards \
......@@ -181,7 +181,7 @@ rule pe_map_genome_star:
threads: 12
log:
os.path.join( config["local_log"], "paired_end", "{sample}", "map_genome_star.log")
os.path.join( config["log_dir"], "paired_end", "{sample}", "map_genome_star.log")
shell:
"(STAR \
......@@ -226,7 +226,7 @@ rule pe_index_genomic_alignment_samtools:
singularity:
"docker://zavolab/samtools:1.8"
log:
os.path.join( config["local_log"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log")
os.path.join( config["log_dir"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
......@@ -275,7 +275,7 @@ rule pe_quantification_salmon:
libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype']
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "genome_quantification_salmon.log")
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_salmon.log")
threads: 6
singularity:
"docker://zavolab/salmon:0.11.0"
......@@ -330,7 +330,7 @@ rule pe_genome_quantification_kallisto:
"docker://zavolab/kallisto:0.46.1"
threads: 8
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "genome_quantification_kallisto.log")
os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_kallisto.log")
shell:
"(kallisto quant \
-i {input.index} \
......
......@@ -10,7 +10,7 @@ rule fastqc:
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
os.path.join(config["local_log"], "single_end", "{sample}", "fastqc.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "fastqc.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
......@@ -34,7 +34,7 @@ rule remove_adapters_cutadapt:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join(config["local_log"], "single_end", "{sample}", "remove_adapters_cutadapt.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_adapters_cutadapt.log")
shell:
"(cutadapt \
-e 0.1 \
......@@ -61,7 +61,7 @@ rule remove_polya_cutadapt:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join(config["local_log"], "single_end", "{sample}", "remove_polya_cutadapt.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "remove_polya_cutadapt.log")
shell:
"(cutadapt \
--match-read-wildcards \
......@@ -117,7 +117,7 @@ rule map_genome_star:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join(config["local_log"], "single_end", "{sample}", "map_genome_star.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "map_genome_star.log")
shell:
"(STAR \
--runMode alignReads \
......@@ -160,7 +160,7 @@ rule index_genomic_alignment_samtools:
"docker://zavolab/samtools:1.8"
threads: 1
log:
os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
......@@ -202,7 +202,7 @@ rule quantification_salmon:
libType = lambda wildcards:
samples_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
os.path.join(config["log_dir"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
singularity:
"docker://zavolab/salmon:0.11.0"
......@@ -250,7 +250,7 @@ rule genome_quantification_kallisto:
directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log:
os.path.join(config["local_log"],"kallisto_align_{sample}.log")
os.path.join(config["log_dir"],"kallisto_align_{sample}.log")
singularity:
"docker://zavolab/kallisto:0.46.1"
shell:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment