Skip to content
Snippets Groups Projects
Snakefile 6.56 KiB
Newer Older
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
configfile: "config.yaml"
#from snakemake.utils import listfiles

localrules: create_output_and_log_directories, concat_samples, finish

#################################################################################
### Finish rule
#################################################################################

rule finish:
	input:
		sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"])
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

#################################################################################
### Create output and log directories
#################################################################################

rule create_output_and_log_directories:
	output:
		output_dir = config["output_dir"],
		cluster_log = config["cluster_log"],
		local_log = config["local_log"],
		sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]),
		flag = config["dir_created"]
	threads:	1
	shell:
		"mkdir -p {output.output_dir}; \
		mkdir -p {output.cluster_log}; \
		mkdir -p {output.local_log}; \
		mkdir -p {output.sample_dir}; \
		touch {output.flag};"

#################################################################################
### Clipping reads
#################################################################################

rule clip_reads:
	input:
		flag = config["dir_created"],
		reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]),
	output:
		reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"),
	params:
		v = "-v",
		n = "-n",
		l = "20",
		qual = "-Q33",
		z = "-z",
		adapter = lambda wildcards: config[ wildcards.sample ]['adapter'],
		cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log")
	log:
		os.path.join(config["local_log"], "clip_reads_{sample}.log")
	singularity:
		"docker://cjh4zavolab/fastx:0.0.14"
	shell:
		"(fastx_clipper \
		{params.v} \
		{params.n} \
		-l {params.l} \
		{params.qual} \
		-a {params.adapter} \
		{params.z} \
		-i <(zcat {input.reads}) \
		-o {output.reads}) &> {log}"

#################################################################################
### Trimming reads
#################################################################################

rule trim_reads:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz")
	output:
		reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
	params:
		v = "-v",
		l = "20",
		t = "20",
		qual = "-Q33",
		z = "-z",
		cluster_log = os.path.join(config["local_log"], "trim_reads_{sample}.log")
	log:
		os.path.join(config["cluster_log"], "trim_reads_{sample}.log")
	singularity:
		"docker://cjh4zavolab/fastx:0.0.14"
	shell:
		"(fastq_quality_trimmer \
		{params.v} \
		-l {params.l} \
		-t {params.t} \
		{params.qual} \
		{params.z} \
		-i <(zcat {input.reads}) \
		-o {output.reads}) &> {log}"

#################################################################################
### Filtering reads
#################################################################################

rule filter_reads:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
	output:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
	params:
		v = "-v",
		q = "20",
		p = "90",
		qual = "-Q33",
		z = "-z",
		cluster_log = os.path.join(config["local_log"], "filter_reads_{sample}.log")
	log:
		os.path.join(config["cluster_log"], "filter_reads_{sample}.log")
	singularity:
		"docker://cjh4zavolab/fastx:0.0.14"
	shell:
		"(fastq_quality_filter \
		{params.v} \
		-q {params.q} \
		-p {params.p} \
		{params.qual} \
		{params.z} \
		-i <(zcat {input.reads}) \
		-o {output.reads}) &> {log}"

#################################################################################
### Convert fastq to fasta
#################################################################################

rule fastq_to_fasta:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
	output:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	params:
		v = "-v",
		qual = "-Q33",
		n = "-n",
		r = "-r",
		z = "-z",
		cluster_log = os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
	log:
		os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log")
	singularity:
		"docker://cjh4zavolab/fastx:0.0.14"
	shell:
		"(fastq_to_fasta \
		{params.v} \
		{params.qual} \
		{params.n} \
		{params.r} \
		-i <(zcat {input.reads}) \
		-o {output.reads}) &> {log}"

#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule map_to_other_genes:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
		index = config["other_RNAs_index"],
		sequence = config["other_RNAs_sequence"]
	output:
		sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"),
		reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
	params:
		silent = "--silent",
		accuracy = "90",
		cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log")
	log:
		os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
	threads:	8
	singularity:
		"docker://fgypas/segemehl:0.2.0"
	shell:
		"(segemehl.x \
		{params.silent} \
		-i {input.index} \
		-d {input.sequence} \
		-q {input.reads} \
		--accuracy {params.accuracy} \
		--threads {threads} \
		-o {output.sam} \
		-u {output.reads} ) &> {log}"
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule map_to_transcripts:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
		index = config["transcripts_index"],
		sequence = config["transcripts_sequence"]
	output:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"),
		reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
	params:
		silent = "--silent",
		accuracy = "90",
		cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log")
	log:
		os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
	threads:	8
	singularity:
		"docker://fgypas/segemehl:0.2.0"
	shell:
		"(segemehl.x \
		{params.silent} \
		-i {input.index} \
		-d {input.sequence} \
		-q {input.reads} \
		--accuracy {params.accuracy} \
		--threads {threads} \
		-o {output.sam} \
		-u {output.reads} ) &> {log}"