configfile: "config.yaml" #from snakemake.utils import listfiles localrules: finish ################################################################################ ### Finish rule ################################################################################ rule finish: input: pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]), counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]), bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"]) ################################################################################ ### Clipping reads ################################################################################ rule clip_reads: input: reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]) output: reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz") params: v = "-v", n = "-n", l = "20", adapter = lambda wildcards: config[wildcards.sample]['adapter'], z = "-z" log: os.path.join(config["local_log"], "clip_reads_{sample}.log") singularity: "docker://zavolab/fastx:0.0.14" shell: "(fastx_clipper \ {params.v} \ {params.n} \ -l {params.l} \ {params.z} \ -a {params.adapter} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################ ### Trimming reads ################################################################################ rule trim_reads: input: reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz") output: reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), params: v = "-v", l = "20", t = lambda wildcards: config[wildcards.sample]['minimum_quality'], Q = lambda wildcards: config[wildcards.sample]['quality_type'], z = "-z" log: os.path.join(config["local_log"], "trim_reads_{sample}.log") singularity: "docker://zavolab/fastx:0.0.14" shell: "(fastq_quality_trimmer \ {params.v} \ -l {params.l} \ -t {params.t} \ -Q {params.Q} \ {params.z} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################ ### Filtering reads ################################################################################ rule filter_reads: input: reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), output: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), params: v = "-v", q = lambda wildcards: config[wildcards.sample]['minimum_quality'], p = "90", z = "-z", Q = lambda wildcards: config[wildcards.sample]['quality_type'] log: os.path.join(config["local_log"], "filter_reads_{sample}.log") singularity: "docker://zavolab/fastx:0.0.14" shell: "(fastq_quality_filter \ {params.v} \ -q {params.q} \ -p {params.p} \ -Q {params.Q} \ {params.z} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################ ### Convert fastq to fasta ################################################################################ rule fastq_to_fasta: input: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), output: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), params: v = "-v", n = "-n", r = "-r" log: os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") singularity: "docker://zavolab/fastx:0.0.14" shell: "(fastq_to_fasta \ {params.v} \ {params.n} \ {params.r} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################ ### Map reads to other genes (rRNA, tRNA, etc...) ################################################################################ rule map_to_other_genes: input: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), index = config["other_RNAs_index"], sequence = config["other_RNAs_sequence"] output: sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"), reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta") params: silent = "--silent", accuracy = "90" log: os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") threads: 8 singularity: "docker://zavolab/segemehl:0.2.0" shell: "(segemehl.x \ {params.silent} \ -i {input.index} \ -d {input.sequence} \ -q {input.reads} \ --accuracy {params.accuracy} \ --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}" ################################################################################ ### Map reads to other genes (rRNA, tRNA, etc...) ################################################################################ rule map_to_transcripts: input: reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta"), index = config["transcripts_index"], sequence = config["transcripts_sequence"] output: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta") params: silent = "--silent", accuracy = "90" log: os.path.join(config["local_log"], "map_to_transcripts_{sample}.log") threads: 8 singularity: "docker://zavolab/segemehl:0.2.0" shell: "(segemehl.x \ {params.silent} \ -i {input.index} \ -d {input.sequence} \ -q {input.reads} \ --accuracy {params.accuracy} \ --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}" ################################################################################ ### Remove multimappers ################################################################################ rule remove_multimappers: input: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam") output: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") log: os.path.join(config["local_log"], "remove_multimappers_{sample}.log") threads: 1 shell: "(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}" ################################################################################ ### SAM to BAM sort and index ################################################################################ rule sam2bam_sort_and_index: input: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam.bai") params: cluster_log = os.path.join(config["cluster_log"], "sam2bam_sort_and_index_{sample}.log") log: os.path.join(config["local_log"], "sam2bam_sort_and_index_{sample}.log") threads: 1 singularity: "docker://zavolab/samtools:1.8" shell: "(samtools view -bS {input.sam} \ | samtools sort - > {output.bam}; \ samtools index {output.bam};) &> {log}" ################################################################################ ### Read length histogram ################################################################################ rule read_length_histogram: input: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") output: read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf") params: dir = os.path.join(config["output_dir"], "{sample}/read_length") log: os.path.join(config["local_log"], "read_length_histogram_{sample}.log") threads: 1 singularity: "docker://zavolab/rcrunch_python:1.0" shell: "(python scripts/plot_read_lengths.py \ --sam {input.sam} \ --outdir {params.dir}) &> {log}" ################################################################################ ### Count reads ################################################################################ rule count_reads: input: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"] output: counts = os.path.join(config["output_dir"], "{sample}/counts.tsv") log: os.path.join(config["local_log"], "count_reads_{sample}.log") threads: 1 singularity: "docker://perl:5.24-slim" shell: "(perl scripts/xp-count-reads-ribseq.pl \ {input.sam} \ {input.transcript_id_gene_id_CDS} \ > {output.counts})" ################################################################################ ### Determine P-site offset ################################################################################ rule determine_p_site_offset: input: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"), transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"] output: p_site_offsets = os.path.join( config["output_dir"], "{sample}/p_site_offsets/alignment_offset.json" ), p_site_offset = os.path.join(config["output_dir"], "{sample}/p_site_offsets") params: outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets") log: os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log") threads: 1 singularity: "docker://fgypas/python_pysam:3.6.5_0.15.1" shell: "(python scripts/determine_p_site_offsets.py \ --bam {input.bam} \ --cds_coordinates {input.transcript_id_gene_id_CDS} \ --outdir {params.outdir}) &> {log}" ################################################################################ ### Filter reads based on selected read lengths and offsets ### (Create a-site profile) ################################################################################ rule filter_reads_based_on_read_lengths_and_offsets: input: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"), p_site_offsets = os.path.join( config["output_dir"], "{sample}/p_site_offsets/alignment_offset.json" ) output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"), log: os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log") threads: 1 singularity: "docker://fgypas/python_pysam:3.6.5_0.15.1" shell: "(python scripts/filter_reads_based_on_read_lengths_and_offsets.py \ --bam {input.bam} \ --p_site_offsets {input.p_site_offsets} \ --bam_out {output.bam}) &> {log}" ################################################################################ ### SAM to BAM sort and index a-site profile ################################################################################ rule bam_sort_and_index: input: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"), output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), log: os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log") threads: 1 singularity: "docker://zavolab/samtools:1.8" shell: "(samtools sort {input.bam} > {output.bam}; \ samtools index {output.bam};) &> {log}"