import pandas as pd configfile: "config.yaml" samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python') samples['out_name'] = samples['Sample_name'] + samples['Library_Type'] localrules: finish ################################################################################# ### Final rule ################################################################################# rule finish: input: final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values), #fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]), #htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]), #gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]), #bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"]) ################################################################################## # Execution dependend on sequencing mode ################################################################################## include: 'paired_end.snakefile' include: 'single_end.snakefile' ################################################################################## ### Fastqc ################################################################################## rule fastqc: input: reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") output: outdir = os.path.join(config["output_dir"], "{sample}", "fastqc") singularity: "docker://zavolab/fastqc:0.11.8" log: os.path.join(config["local_log"], "fastqc_{sample}.log") shell: "(mkdir -p {output.outdir}; \ fastqc \ --outdir {output.outdir} \ {input.reads}) &> {log}" ################################################################################## ### HTSeq quality assessment of the fastq file ################################################################################## rule htseq_qa: input: reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") output: qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf") singularity: "docker://zavolab/python_htseq:3.6.5_0.10.0" log: os.path.join(config["local_log"], "htseq_qa_{sample}.log") shell: "(htseq-qa \ -t fastq \ -o {output.qual_pdf} \ {input.reads} ) &> {log}" ################################################################################## ### Map to other RNAs with Segemehl ################################################################################## rule map_to_other_RNAs: input: reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"), index = config["other_RNAs_index"], sequence = config["other_RNAs_sequence"] output: sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"), reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz") params: reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"), silent = "--silent", accuracy = "90" log: os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") threads: 8 singularity: "docker://zavolab/segemehl:0.2.0" shell: "(segemehl.x \ {params.silent} \ -i {input.index} \ -d {input.sequence} \ -q {input.reads} \ --accuracy {params.accuracy} \ --threads {threads} \ -o {output.sam} \ -u {params.reads}; \ gzip -c {params.reads} > {output.reads}; \ rm {params.reads}) &> {log}" ################################################################################## ### salmon quant ################################################################################## rule salmon_quant: input: reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), gtf = config["annotation_filtered"], index = config["salmon_index"] output: output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"), gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf") params: libType = lambda wildcards: config[wildcards.sample]['libType'], fldMean = lambda wildcards: config[wildcards.sample]['fldMean'], fldSD = lambda wildcards: config[wildcards.sample]['fldSD'], log: os.path.join(config["local_log"], "salmon_quant_{sample}.log") threads: 6 singularity: "docker://zavolab/salmon:0.11.0" shell: "(salmon quant \ --index {input.index} \ --libType {params.libType} \ --unmatedReads <(zcat {input.reads}) \ --seqBias \ --geneMap {input.gtf} \ --fldMean {params.fldMean} \ --fldSD {params.fldSD} \ --threads {threads} \ --output {output.output_dir}) &> {log}" ################################################################################# ### Align reads STAR ################################################################################# rule align_reads_STAR: input: index = config["STAR_index"], reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), gtf = config["annotation"] output: outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") params: outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_") log: os.path.join(config["local_log"],"align_reads_STAR_{sample}.log") threads: 8 singularity: "docker://zavolab/star:2.6.0a" shell: "(STAR --runMode alignReads \ --twopassMode Basic \ --runThreadN {threads} \ --genomeDir {input.index} \ --sjdbGTFfile {input.gtf} \ --readFilesIn {input.reads} \ --readFilesCommand zcat \ --outFileNamePrefix {params.outFileNamePrefix} \ --outSAMtype BAM Unsorted) &> {log}" ################################################################################ ### Sort alignment file ################################################################################ rule sort_bam: input: bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") output: bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") threads: 8 log: os.path.join(config["local_log"],"sort_bam_{sample}.log") singularity: "docker://zavolab/samtools:1.8" shell: "(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}" ################################################################################ ### Index alignment file ################################################################################ rule samtools_index: input: bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") output: bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai") log: os.path.join(config["local_log"],"samtools_index_{sample}.log") singularity: "docker://zavolab/samtools:1.8" shell: "(samtools index {input.bam} > {output.bai}) &> {log}"