configfile: "config.yaml" ################################################################################ ### python modules ################################################################################ import os import sys import pandas as pd ############################ samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t") localrules: finish ################################################################################## # Execution dependend on sequencing mode ################################################################################## include: 'paired_end.snakefile' include: 'single_end.snakefile' ################################################################################# ### Final rule ################################################################################# rule finish: input: outdir1 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate1_fastqc"), sample=samples_table.index.values), outdir2 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate2_fastqc"), sample=samples_table.index.values), reads1 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz"), sample=samples_table.index.values) rule create_index_star: ''' Create index using STAR''' input: genome = lambda wildcards: samples_table.loc[wildcards.sample, 'genome'], gtf = lambda wildcards: samples_table.loc[wildcards.sample, 'gtf'] output: chromosome_info = os.path.join( config["star_indexes"], "{organism}", "{index_size}", "STAR_index", "chrNameLength.txt"), chromosomes_names = os.path.join( config["star_indexes"], "{organism}", "{index_size}", "STAR_index", "chrName.txt") params: output_dir = os.path.join( config["star_indexes"], "{organism}", "{index_size}", "STAR_index"), outFileNamePrefix = os.path.join( config["star_indexes"], "{organism}", "{index_size}", "STAR_index/STAR_"), sjdbOverhang = lambda wildcards: samples_table[wildcards.sample, "index_size"], singularity: "docker://zavolab/star:2.6.0a" threads: 12 log: os.path.join( config["local_log"], "{organism}_{index_size}_create_index_star.log") shell: "(mkdir -p {params.output_dir}; \ chmod -R 777 {params.output_dir}; \ STAR \ --runMode genomeGenerate \ --sjdbOverhang {params.sjdbOverhang} \ --genomeDir {params.output_dir} \ --genomeFastaFiles {input.genome} \ --runThreadN {threads} \ --outFileNamePrefix {params.outFileNamePrefix} \ --sjdbGTFfile {input.gtf}) &> {log}" rule create_index_salmon: '''Create index for salmon quantification''' input: transcriptome = lambda wildcards: samples_table.loc[wildcards.sample, 'tr_fasta_filtered'] output: index = os.path.join( config["salmon_indexes"], "{organism}", "salmon.idx") params: kmerLen = lambda wildcards: samples_table.loc[wildcards.sample, 'kmer'] singularity: "docker://zavolab/salmon:0.11.0" log: os.path.join(config["local_log"], "{organism}_create_index_salmon.log") threads: 8 shell: "(salmon index \ --t {input.transcriptome} \ --i {output.index} \ --k {params.kmerLen} \ --threads {threads}) &> {log}" rule create_index_kallisto: '''Create index for running Kallisto''' input: transcriptome = lambda wildcards: samples_table.loc[wildcards.sample, 'tr_fasta_filtered'] output: index = os.path.join( config["kallisto_indexes"], "{organism}", "kallisto.idx") params: output_dir = lambda wildcards: os.path.join( config["kallisto_indexes"], samples_table[wildcards.sample, 'organism']) singularity: "docker://zavolab/kallisto:0.9" log: os.path.join(config["local_log"], "{organism}_create_index_kallisto.log") shell: "(mkdir -p {params.output_dir}; \ chmod -R 777 {params.output_dir}; \ kallisto index -i {output.index} {input.transcriptome}) &> {log}"