Newer
Older
"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
BIOPZ-Gypas Foivos
committed
import os
import sys
BIOPZ-Gypas Foivos
committed
# Get sample table
samples_table = pd.read_csv(
config['samples'],
header=0,
index_col=0,
comment='#',
engine='python',
sep="\t",
)
localrules: finish, rename_star_rpm_for_alfa, prepare_files_for_report, \
prepare_MultiQC_config
# Create log directories
os.makedirs(
os.path.join(
os.getcwd(),
config['log_dir'],
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
BIOPZ-Gypas Foivos
committed
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
BIOPZ-Katsantoni Maria
committed
"""
Rule for collecting outputs
"""
os.path.join(
config['output_dir'],
BIOPZ-Iborra de Toledo Paula
committed
BIOPZ-Gypas Foivos
committed
rule create_index_star:
BIOPZ-Katsantoni Maria
committed
"""
Create index for STAR alignments
"""
BIOPZ-Katsantoni Maria
committed
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
BIOPZ-Katsantoni Maria
committed
samples_table['gtf']
[samples_table['organism'] == wildcards.organism]
[0]
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrName.txt")
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index"),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_{index_size}_create_index_star.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
BIOPZ-Katsantoni Maria
committed
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Iborra de Toledo Paula
committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
rule extract_transcriptome:
""" Create transcriptome from genome and gene annotations """
input:
genome = lambda wildcards:
samples_table['genome'][
samples_table['organism'] == wildcards.organism
][0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0]
output:
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log")
singularity:
BIOPZ-Iborra de Toledo Paula
committed
shell:
"(gffread \
-w {output.transcriptome} \
-g {input.genome} {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule create_index_salmon:
BIOPZ-Katsantoni Maria
committed
"""
Create index for Salmon quantification
"""
BIOPZ-Iborra de Toledo Paula
committed
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
BIOPZ-Katsantoni Maria
committed
"salmon.idx"))
BIOPZ-Katsantoni Maria
committed
kmerLen = "{kmer}"
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_{kmer}_create_index_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
BIOPZ-Katsantoni Maria
committed
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
Loading
Loading full blame...