configfile: "config.yaml" #from snakemake.utils import listfiles localrules: create_output_and_log_directories, concat_samples, finish ################################################################################# ### Finish rule ################################################################################# rule finish: input: sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"]) ################################################################################# ### Create output and log directories ################################################################################# rule create_output_and_log_directories: output: output_dir = config["output_dir"], cluster_log = config["cluster_log"], local_log = config["local_log"], sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]), flag = config["dir_created"] threads: 1 shell: "mkdir -p {output.output_dir}; \ mkdir -p {output.cluster_log}; \ mkdir -p {output.local_log}; \ mkdir -p {output.sample_dir}; \ touch {output.flag};" ################################################################################# ### Clipping reads ################################################################################# rule clip_reads: input: flag = config["dir_created"], reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]), output: reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"), params: v = "-v", n = "-n", l = "20", qual = "-Q33", z = "-z", adapter = lambda wildcards: config[ wildcards.sample ]['adapter'], cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log") log: os.path.join(config["local_log"], "clip_reads_{sample}.log") singularity: "docker://cjh4zavolab/fastx:0.0.14" shell: "(fastx_clipper \ {params.v} \ {params.n} \ -l {params.l} \ {params.qual} \ -a {params.adapter} \ {params.z} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# ### Trimming reads ################################################################################# rule trim_reads: input: reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz") output: reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), params: v = "-v", l = "20", t = "20", qual = "-Q33", z = "-z", cluster_log = os.path.join(config["local_log"], "trim_reads_{sample}.log") log: os.path.join(config["cluster_log"], "trim_reads_{sample}.log") singularity: "docker://cjh4zavolab/fastx:0.0.14" shell: "(fastq_quality_trimmer \ {params.v} \ -l {params.l} \ -t {params.t} \ {params.qual} \ {params.z} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# ### Filtering reads ################################################################################# rule filter_reads: input: reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), output: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), params: v = "-v", q = "20", p = "90", qual = "-Q33", z = "-z", cluster_log = os.path.join(config["local_log"], "filter_reads_{sample}.log") log: os.path.join(config["cluster_log"], "filter_reads_{sample}.log") singularity: "docker://cjh4zavolab/fastx:0.0.14" shell: "(fastq_quality_filter \ {params.v} \ -q {params.q} \ -p {params.p} \ {params.qual} \ {params.z} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# ### Convert fastq to fasta ################################################################################# rule fastq_to_fasta: input: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), output: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), params: v = "-v", qual = "-Q33", n = "-n", r = "-r", z = "-z", cluster_log = os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") log: os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log") singularity: "docker://cjh4zavolab/fastx:0.0.14" shell: "(fastq_to_fasta \ {params.v} \ {params.qual} \ {params.n} \ {params.r} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# ### Map reads to other genes (rRNA, tRNA, etc...) ################################################################################# rule map_to_other_genes: input: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), index = config["other_RNAs_index"], sequence = config["other_RNAs_sequence"] output: sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"), reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta") params: silent = "--silent", accuracy = "90", cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log") log: os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") threads: 8 singularity: "docker://fgypas/segemehl:0.2.0" shell: "(segemehl.x \ {params.silent} \ -i {input.index} \ -d {input.sequence} \ -q {input.reads} \ --accuracy {params.accuracy} \ --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}" ################################################################################# ### Map reads to other genes (rRNA, tRNA, etc...) ################################################################################# rule map_to_transcripts: input: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), index = config["transcripts_index"], sequence = config["transcripts_sequence"] output: sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta") params: silent = "--silent", accuracy = "90", cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log") log: os.path.join(config["local_log"], "map_to_transcripts_{sample}.log") threads: 8 singularity: "docker://fgypas/segemehl:0.2.0" shell: "(segemehl.x \ {params.silent} \ -i {input.index} \ -d {input.sequence} \ -q {input.reads} \ --accuracy {params.accuracy} \ --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}"