Newer
Older
configfile: "config.yaml"
#from snakemake.utils import listfiles
BIOPZ-Gypas Foivos
committed
localrules: finish
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]),
BIOPZ-Gypas Foivos
committed
counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]),
bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"])
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"])
BIOPZ-Gypas Foivos
committed
reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz")
BIOPZ-Gypas Foivos
committed
adapter = lambda wildcards: config[wildcards.sample]['adapter'],
BIOPZ-Gypas Foivos
committed
z = "-z"
log:
os.path.join(config["local_log"], "clip_reads_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastx_clipper \
{params.v} \
{params.n} \
-l {params.l} \
-a {params.adapter} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
################################################################################
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz")
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
t = lambda wildcards: config[wildcards.sample]['minimum_quality'],
Q = lambda wildcards: config[wildcards.sample]['quality_type'],
BIOPZ-Gypas Foivos
committed
z = "-z"
os.path.join(config["local_log"], "trim_reads_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_quality_trimmer \
{params.v} \
-l {params.l} \
-t {params.t} \
-Q {params.Q} \
{params.z} \
-i <(zcat {input.reads}) \
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
################################################################################
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
q = lambda wildcards: config[wildcards.sample]['minimum_quality'],
BIOPZ-Gypas Foivos
committed
Q = lambda wildcards: config[wildcards.sample]['quality_type']
os.path.join(config["local_log"], "filter_reads_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_quality_filter \
{params.v} \
-q {params.q} \
-p {params.p} \
-Q {params.Q} \
{params.z} \
-i <(zcat {input.reads}) \
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
################################################################################
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
BIOPZ-Gypas Foivos
committed
r = "-r"
os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_to_fasta \
{params.v} \
{params.n} \
{params.r} \
BIOPZ-Gypas Foivos
committed
################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
BIOPZ-Gypas Foivos
committed
################################################################################
rule map_to_other_genes:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
index = config["other_RNAs_index"],
sequence = config["other_RNAs_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
params:
silent = "--silent",
BIOPZ-Gypas Foivos
committed
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
BIOPZ-Gypas Foivos
committed
################################################################################
rule map_to_transcripts:
input:
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta"),
index = config["transcripts_index"],
sequence = config["transcripts_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
params:
silent = "--silent",
BIOPZ-Gypas Foivos
committed
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
### Remove multimappers
################################################################################
rule remove_multimappers:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam")
output:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
log:
os.path.join(config["local_log"], "remove_multimappers_{sample}.log")
threads: 1
shell:
"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Gypas Foivos
committed
################################################################################
### SAM to BAM sort and index
################################################################################
rule sam2bam_sort_and_index:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
BIOPZ-Gypas Foivos
committed
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam.bai")
params:
cluster_log = os.path.join(config["cluster_log"], "sam2bam_sort_and_index_{sample}.log")
log:
os.path.join(config["local_log"], "sam2bam_sort_and_index_{sample}.log")
threads: 1
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools view -bS {input.sam} \
| samtools sort - > {output.bam}; \
samtools index {output.bam};) &> {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
### Read length histogram
################################################################################
rule read_length_histogram:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
output:
read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf")
params:
dir = os.path.join(config["output_dir"], "{sample}/read_length")
log:
os.path.join(config["local_log"], "read_length_histogram_{sample}.log")
threads: 1
singularity:
"docker://zavolab/rcrunch_python:1.0"
shell:
"(python scripts/plot_read_lengths.py \
--sam {input.sam} \
--outdir {params.dir}) & > {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
### Count reads
################################################################################
rule count_reads:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"),
transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"]
output:
counts = os.path.join(config["output_dir"], "{sample}/counts.tsv")
BIOPZ-Gypas Foivos
committed
log:
os.path.join(config["local_log"], "count_reads_{sample}.log")
BIOPZ-Gypas Foivos
committed
threads: 1
singularity:
"docker://perl:5.24-slim"
shell:
"(perl scripts/xp-count-reads-ribseq.pl \
{input.sam} \
{input.transcript_id_gene_id_CDS} \
> {output.counts})"
BIOPZ-Gypas Foivos
committed
################################################################################
### Determine P-site offset
################################################################################
rule determine_p_site_offset:
input:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"]
output:
p_site_offsets = os.path.join(
config["output_dir"],
"{sample}/p_site_offsets/alignment_offset.json"
),
p_site_offset = os.path.join(config["output_dir"],
"{sample}/p_site_offsets")
params:
BIOPZ-Gypas Foivos
committed
outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets")
BIOPZ-Gypas Foivos
committed
log:
os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log")
threads: 1
singularity:
"docker://fgypas/python_pysam:3.6.5_0.15.1"
shell:
"(python scripts/determine_p_site_offsets.py \
--bam {input.bam} \
--cds_coordinates {input.transcript_id_gene_id_CDS} \
--outdir {params.outdir}) &> {log}"
################################################################################
BIOPZ-Gypas Foivos
committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
### Filter reads based on selected read lengths and offsets
### (Create a-site profile)
################################################################################
rule filter_reads_based_on_read_lengths_and_offsets:
input:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
p_site_offsets = os.path.join(
config["output_dir"],
"{sample}/p_site_offsets/alignment_offset.json"
)
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
log:
os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
threads: 1
singularity:
"docker://fgypas/python_pysam:3.6.5_0.15.1"
shell:
"(python scripts/filter_reads_based_on_read_lengths_and_offsets.py \
--bam {input.bam} \
--p_site_offsets {input.p_site_offsets} \
--bam_out {output.bam}) &> {log}"
################################################################################
### SAM to BAM sort and index a-site profile
BIOPZ-Gypas Foivos
committed
################################################################################
BIOPZ-Gypas Foivos
committed
rule bam_sort_and_index:
input:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"),
bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"),
log:
os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log")
threads: 1
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools sort {input.bam} > {output.bam}; \
samtools index {output.bam};) &> {log}"