Skip to content
Snippets Groups Projects
Snakefile 11.3 KiB
Newer Older
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
configfile: "config.yaml"
#from snakemake.utils import listfiles

BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
### Finish rule
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule finish:
	input:
		pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]),
		counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]),
		bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"])
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
### Clipping reads
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule clip_reads:
	input:
		reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"])
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	output:
		reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz")
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	params:
		v = "-v",
		n = "-n",
		l = "20",
		adapter = lambda wildcards: config[wildcards.sample]['adapter'],
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	log:
		os.path.join(config["local_log"], "clip_reads_{sample}.log")
	singularity:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	shell:
		"(fastx_clipper \
		{params.v} \
		{params.n} \
		-l {params.l} \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		{params.z} \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-a {params.adapter} \
		-i <(zcat {input.reads}) \
		-o {output.reads}) &> {log}"

################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
### Trimming reads
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule trim_reads:
	input:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz")
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	output:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	params:
		v = "-v",
		l = "20",
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		t = lambda wildcards: config[wildcards.sample]['minimum_quality'],
		Q = lambda wildcards: config[wildcards.sample]['quality_type'],
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	log:
		os.path.join(config["local_log"], "trim_reads_{sample}.log")
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	singularity:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	shell:
		"(fastq_quality_trimmer \
		{params.v} \
		-l {params.l} \
		-t {params.t} \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-Q {params.Q} \
		{params.z} \
		-i <(zcat {input.reads}) \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-o {output.reads}) &> {log}"

################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
### Filtering reads
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule filter_reads:
	input:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	output:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	params:
		v = "-v",
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		q = lambda wildcards: config[wildcards.sample]['minimum_quality'],
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		p = "90",
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		z = "-z",
		Q = lambda wildcards: config[wildcards.sample]['quality_type']
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	log:
		os.path.join(config["local_log"], "filter_reads_{sample}.log")
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	singularity:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	shell:
		"(fastq_quality_filter \
		{params.v} \
		-q {params.q} \
		-p {params.p} \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-Q {params.Q} \
		{params.z} \
		-i <(zcat {input.reads}) \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-o {output.reads}) &> {log}"

################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
### Convert fastq to fasta
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule fastq_to_fasta:
	input:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	output:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	params:
		v = "-v",
		n = "-n",
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	log:
		os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	singularity:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
	shell:
		"(fastq_to_fasta \
		{params.v} \
		{params.n} \
		{params.r} \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-i <(zcat {input.reads}) \
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		-o {output.reads}) &> {log}"

################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule map_to_other_genes:
	input:
		reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
		index = config["other_RNAs_index"],
		sequence = config["other_RNAs_sequence"]
	output:
		sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"),
		reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
	params:
		silent = "--silent",
	log:
		os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
	threads:	8
	singularity:
	shell:
		"(segemehl.x \
		{params.silent} \
		-i {input.index} \
		-d {input.sequence} \
		-q {input.reads} \
		--accuracy {params.accuracy} \
		--threads {threads} \
		-o {output.sam} \
		-u {output.reads} ) &> {log}"
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
################################################################################
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed

rule map_to_transcripts:
	input:
BIOPZ-Gypas Foivos's avatar
BIOPZ-Gypas Foivos committed
		reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta"),
		index = config["transcripts_index"],
		sequence = config["transcripts_sequence"]
	output:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"),
		reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
	params:
		silent = "--silent",
	log:
		os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
	threads:	8
	singularity:
	shell:
		"(segemehl.x \
		{params.silent} \
		-i {input.index} \
		-d {input.sequence} \
		-q {input.reads} \
		--accuracy {params.accuracy} \
		--threads {threads} \
		-o {output.sam} \
		-u {output.reads} ) &> {log}"

################################################################################
### Remove multimappers
################################################################################

rule remove_multimappers:
	input:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam")
	output:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
	log:
		os.path.join(config["local_log"], "remove_multimappers_{sample}.log")
	threads:	1
	shell:
		"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"
################################################################################
### SAM to BAM sort and index
################################################################################

rule sam2bam_sort_and_index:
	input:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
	output:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
		bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam.bai")
	params:
		cluster_log = os.path.join(config["cluster_log"], "sam2bam_sort_and_index_{sample}.log")
	log:
		os.path.join(config["local_log"], "sam2bam_sort_and_index_{sample}.log")
	threads:	1
	singularity:
		"docker://zavolab/samtools:1.8"
	shell:
		"(samtools view -bS {input.sam} \
		 | samtools sort - > {output.bam}; \
		 samtools index {output.bam};) &> {log}"

################################################################################
### Read length histogram
################################################################################

rule read_length_histogram:
	input:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
	output:
		read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf")
	params:
		dir = os.path.join(config["output_dir"], "{sample}/read_length")
	log:
		os.path.join(config["local_log"], "read_length_histogram_{sample}.log")
	threads:	1
	singularity:
		"docker://zavolab/rcrunch_python:1.0"
	shell:
		"(python scripts/plot_read_lengths.py \
		--sam {input.sam} \
		--outdir {params.dir}) & > {log}"

################################################################################
### Count reads
################################################################################

rule count_reads:
	input:
		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"),
		transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"]
	output:
		counts = os.path.join(config["output_dir"], "{sample}/counts.tsv")
	log:
		os.path.join(config["local_log"], "count_reads_{sample}.log")
	threads:	1
	singularity:
		"docker://perl:5.24-slim"
	shell:
		"(perl scripts/xp-count-reads-ribseq.pl \
		{input.sam} \
		{input.transcript_id_gene_id_CDS} \
		> {output.counts})"

################################################################################
### Determine P-site offset
################################################################################

rule determine_p_site_offset:
	input:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
		transcript_id_gene_id_CDS = config["transcript_id_gene_id_CDS"]
	output:
		p_site_offsets = os.path.join(
			config["output_dir"],
			"{sample}/p_site_offsets/alignment_offset.json"
		),
		p_site_offset = os.path.join(config["output_dir"],
									 "{sample}/p_site_offsets")
	params:
		outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets")
	log:
		os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log")
	threads:	1
	singularity:
		"docker://fgypas/python_pysam:3.6.5_0.15.1"
	shell:
		"(python scripts/determine_p_site_offsets.py \
		--bam {input.bam} \
		--cds_coordinates {input.transcript_id_gene_id_CDS} \
		--outdir {params.outdir}) &> {log}"

################################################################################
### Filter reads based on selected read lengths and offsets
### (Create a-site profile)
################################################################################

rule filter_reads_based_on_read_lengths_and_offsets:
	input:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
		p_site_offsets = os.path.join(
			config["output_dir"],
			"{sample}/p_site_offsets/alignment_offset.json"
		)
	output:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
	log:
		os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
	threads:	1
	singularity:
		"docker://fgypas/python_pysam:3.6.5_0.15.1"
	shell:
		"(python scripts/filter_reads_based_on_read_lengths_and_offsets.py \
		--bam {input.bam} \
		--p_site_offsets {input.p_site_offsets} \
		--bam_out {output.bam}) &> {log}"

################################################################################
### SAM to BAM sort and index a-site profile
################################################################################

rule bam_sort_and_index:
	input:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
	output:
		bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"),
		bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"),
	log:
		os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log")
	threads:	1
	singularity:
		"docker://zavolab/samtools:1.8"
	shell:
		"(samtools sort {input.bam} > {output.bam}; \
		 samtools index {output.bam};) &> {log}"