Skip to content
Snippets Groups Projects
Snakefile 6.33 KiB
Newer Older
configfile: "config.yaml"

localrules: finish

#################################################################################
### Final rule
#################################################################################

rule finish:
	input:
		fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]),
		htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]),
		gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]),
		bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"])

##################################################################################
### Fastqc
##################################################################################

rule fastqc:
	input:
		reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
	output:
		outdir = os.path.join(config["output_dir"], "{sample}", "fastqc")
	singularity:
		"docker://zavolab/fastqc:0.11.8"
	log:
		os.path.join(config["local_log"], "fastqc_{sample}.log")
	shell:
		"(mkdir -p {output.outdir}; \
		fastqc \
		--outdir {output.outdir} \
		{input.reads}) &> {log}"

##################################################################################
### HTSeq quality assessment of the fastq file
##################################################################################

rule htseq_qa:
	input:
		reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
	output:
		qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf")
	singularity:
		"docker://zavolab/python_htseq:3.6.5_0.10.0"
	log:
		os.path.join(config["local_log"], "htseq_qa_{sample}.log")
	shell:
		"(htseq-qa \
		-t fastq \
		-o {output.qual_pdf} \
        {input.reads} ) &> {log}"

##################################################################################
### Map to other RNAs with Segemehl
##################################################################################

rule map_to_other_RNAs:
	input:
		reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"),
		index = config["other_RNAs_index"],
		sequence = config["other_RNAs_sequence"]
	output:
		sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"),
		reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz")
	params:
		reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"),
		silent = "--silent",
		accuracy = "90"
	log:
		os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
	threads:	8
	singularity:
		"docker://zavolab/segemehl:0.2.0"
	shell:
		"(segemehl.x \
		{params.silent} \
		-i {input.index} \
		-d {input.sequence} \
		-q {input.reads} \
		--accuracy {params.accuracy} \
		--threads {threads} \
		-o {output.sam} \
		-u {params.reads}; \
		gzip -c {params.reads} > {output.reads}; \
		rm {params.reads}) &> {log}"

##################################################################################
### salmon quant
##################################################################################

rule salmon_quant:
	input:
		reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
		gtf = config["annotation_filtered"],
		index = config["salmon_index"]
	output:
		output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"),
		gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"),
		tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf")
	params:
		libType = lambda wildcards: config[wildcards.sample]['libType'],
		fldMean = lambda wildcards: config[wildcards.sample]['fldMean'],
		fldSD = lambda wildcards: config[wildcards.sample]['fldSD'],
	log:
		os.path.join(config["local_log"], "salmon_quant_{sample}.log")
	threads:	6
	singularity:
		"docker://zavolab/salmon:0.11.0"
	shell:
		"(salmon quant \
		--index {input.index} \
		--libType {params.libType} \
		--unmatedReads <(zcat {input.reads}) \
		--seqBias \
		--geneMap {input.gtf} \
		--fldMean {params.fldMean} \
		--fldSD {params.fldSD} \
		--threads {threads} \
		--output {output.output_dir}) &> {log}"

#################################################################################
### Align reads STAR
#################################################################################

rule align_reads_STAR:
	input:
		index = config["STAR_index"],
		reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
		gtf = config["annotation"]
	output:
		outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
	params:
		outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_")
	log:
		os.path.join(config["local_log"],"align_reads_STAR_{sample}.log")
	threads:	8
	singularity:
		"docker://zavolab/star:2.6.0a"
	shell:
		"(STAR --runMode alignReads \
		--twopassMode Basic \
		--runThreadN {threads} \
		--genomeDir {input.index} \
		--sjdbGTFfile {input.gtf} \
		--readFilesIn {input.reads} \
		--readFilesCommand zcat \
		--outFileNamePrefix {params.outFileNamePrefix} \
		--outSAMtype BAM Unsorted) &> {log}"

################################################################################
### Sort alignment file
################################################################################

rule sort_bam:
	input:
		bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
	output:
		bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
	threads:	8
	log:
		os.path.join(config["local_log"],"sort_bam_{sample}.log")
	singularity:
		"docker://zavolab/samtools:1.8"
	shell:
		"(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}"

################################################################################
### Index alignment file
################################################################################

rule samtools_index:
	input:
		bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
	output:
		bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai")
	log:
		os.path.join(config["local_log"],"samtools_index_{sample}.log")
	singularity:
		"docker://zavolab/samtools:1.8"
	shell:
		"(samtools index {input.bam} > {output.bai}) &> {log}"