From 44d3f3e31b0ba564fda14aaf334abf99ecf3655c Mon Sep 17 00:00:00 2001
From: fgypas <fgypas@gmail.com>
Date: Fri, 20 Dec 2019 18:25:22 +0100
Subject: [PATCH] Rename snakemake/paired_end.snakemake to
 snakemake/paired_end.snakefile. Fix wiring of rules. Add fake tests.

---
 snakemake/Snakefile                           | 126 +++++++++-
 snakemake/config.yaml                         |   8 +-
 ...red_end.snakemake => paired_end.snakefile} | 188 ++++----------
 snakemake/single_end.snakefile                | 238 ++++++------------
 tests/samples.tsv                             |   3 +
 5 files changed, 245 insertions(+), 318 deletions(-)
 rename snakemake/{paired_end.snakemake => paired_end.snakefile} (64%)
 create mode 100644 tests/samples.tsv

diff --git a/snakemake/Snakefile b/snakemake/Snakefile
index e0b8f6f..b541c1e 100644
--- a/snakemake/Snakefile
+++ b/snakemake/Snakefile
@@ -1,20 +1,132 @@
+configfile: "config.yaml"
+
+################################################################################
+### python modules
+################################################################################
+
+import os
+import sys
 import pandas as pd
 
-configfile: "config.yaml"
+############################
+
+samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
 
 localrules: finish
 
+##################################################################################
+# Execution dependend on sequencing mode
+##################################################################################
+
+include: 'paired_end.snakefile'
+include: 'single_end.snakefile'
+
 #################################################################################
 ### Final rule
 #################################################################################
 
 rule finish:
 	input:
-		final_sample = expand()
+		outdir1 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate1_fastqc"), sample=samples_table.index.values),
+		outdir2 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate2_fastqc"), sample=samples_table.index.values)
 
-##################################################################################
-# Execution dependend on sequencing mode
-##################################################################################
 
-include: 'paired_end.snakefile'
-include: 'single_end.snakefile'
+rule create_index_star:
+	''' Create index using STAR'''
+	input:
+		genome = lambda wildcards: samples_table.loc[wildcards.sample, 'genome'],
+		gtf = lambda wildcards: samples_table.loc[wildcards.sample, 'gtf']
+	output:
+		chromosome_info = os.path.join(
+			config["star_indexes"],
+			"{organism}",
+			"{index_size}",
+			"STAR_index",
+			"chrNameLength.txt"),
+		chromosomes_names = os.path.join(
+			config["star_indexes"],
+			"{organism}",
+			"{index_size}",
+			"STAR_index",
+			"chrName.txt")
+	params:
+		output_dir = os.path.join(
+				config["star_indexes"],
+				"{organism}",
+				"{index_size}",
+				"STAR_index"),
+		outFileNamePrefix = os.path.join(
+				config["star_indexes"],
+				"{organism}",
+				"{index_size}",
+				"STAR_index/STAR_"),
+		sjdbOverhang = lambda wildcards:
+			samples_table[wildcards.sample, "index_size"],
+	singularity:
+		"docker://zavolab/star:2.6.0a"
+	threads: 12
+	log:
+		os.path.join( config["local_log"], "{organism}_{index_size}_create_index_star.log")
+	shell:
+		"(mkdir -p {params.output_dir}; \
+		chmod -R 777 {params.output_dir}; \
+		STAR \
+		--runMode genomeGenerate \
+		--sjdbOverhang {params.sjdbOverhang} \
+		--genomeDir {params.output_dir} \
+		--genomeFastaFiles {input.genome} \
+		--runThreadN {threads} \
+		--outFileNamePrefix {params.outFileNamePrefix} \
+		--sjdbGTFfile {input.gtf}) &> {log}"
+
+
+rule create_index_salmon:
+	'''Create index for salmon quantification'''
+	input:
+		transcriptome = lambda wildcards:
+			samples_table.loc[wildcards.sample, 'tr_fasta_filtered']
+	output:
+		index = os.path.join(
+			config["salmon_indexes"],
+			"{organism}",
+			"salmon.idx")
+	params:
+		kmerLen = lambda wildcards:
+			samples_table.loc[wildcards.sample, 'kmer']
+	singularity:
+		"docker://zavolab/salmon:0.11.0"
+	log:
+		os.path.join(config["local_log"], "{organism}_create_index_salmon.log")
+	threads:	8
+	shell:
+		"(salmon index \
+		--t {input.transcriptome} \
+		--i {output.index} \
+		--k {params.kmerLen} \
+		--threads {threads}) &> {log}"
+
+
+rule create_index_kallisto:
+	'''Create index for running Kallisto'''
+	input:
+		transcriptome = lambda wildcards:
+			samples_table.loc[wildcards.sample, 'tr_fasta_filtered']
+	output:
+		index = os.path.join(
+				config["kallisto_indexes"],
+				"{organism}",
+				"kallisto.idx")
+	params:
+		output_dir = lambda wildcards:
+			os.path.join(
+				config["kallisto_indexes"],
+				samples_table[wildcards.sample, 'organism'])
+	singularity:
+		"docker://zavolab/kallisto:0.9"
+	log:
+		os.path.join(config["local_log"], "{organism}_create_index_kallisto.log")
+	shell:
+		"(mkdir -p {params.output_dir}; \
+		chmod -R 777 {params.output_dir}; \
+		kallisto index -i {output.index} {input.transcriptome}) &> {log}"
+
diff --git a/snakemake/config.yaml b/snakemake/config.yaml
index 65bdfa9..80641a9 100644
--- a/snakemake/config.yaml
+++ b/snakemake/config.yaml
@@ -16,12 +16,14 @@
   database_path: "/scicore/home/zavolan/GROUP/Rna_Seq_pipeline/Blabla"
   STAR_idx_folder: "STAR_indices"
   output_dir: "results"
+  star_indexes: "results"
+  salmon_indexes: "results"
+  kallisto_indexes: "results"
   local_log: "logs/local_log"
   cluster_log: "logs/cluster_log"
+  
   ##############################################################################
   ### Sample info
   ##############################################################################
-  input_dir: "samples"
-  sample: ["test"]
-  test: {libType: A, fldMean: 300, fldSD: 100}
+  samples: "../tests/samples.tsv"
 ...
diff --git a/snakemake/paired_end.snakemake b/snakemake/paired_end.snakefile
similarity index 64%
rename from snakemake/paired_end.snakemake
rename to snakemake/paired_end.snakefile
index b9443eb..ebc283a 100644
--- a/snakemake/paired_end.snakemake
+++ b/snakemake/paired_end.snakefile
@@ -1,13 +1,15 @@
 
-rule fastqc:
+rule pe_fastqc:
 	'''A quality control tool for high throughput sequence data'''
 
 	input:
-		reads1 = os.path.join(get_fq_1("{sample}")),
-		reads2 = os.path.join(get_fq_2("{sample}"))
+		reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
+		reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
 	output:
 		outdir1 = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate1_fastqc")),
 		outdir2 = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate2_fastqc"))
+	threads:
+		2
 	singularity:
 		"docker://zavolab/fastqc:0.11.8"
 	log:
@@ -15,15 +17,14 @@ rule fastqc:
 	shell:
 		"(mkdir -p {output.outdir1}; \
 		mkdir -p {output.outdir2}; \
-		fastqc --outdir {output.outdir1} {input.reads1}; \
+		fastqc --outdir {output.outdir1} {input.reads1}; & \
 		fastqc --outdir {output.outdir2} {input.reads2}) &> {log}"
 
-
-rule htseq_qa:
+rule pe_htseq_qa:
 	'''Assess the technical quality of a run'''
 	input:
-		reads1 = os.path.join(get_fq_1("{sample}")),
-		reads2 = os.path.join(get_fq_2("{sample}"))
+		reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
+		reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
 	output:
 		qual_pdf_mate1 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate1.pdf"),
 		qual_pdf_mate2 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate2.pdf")
@@ -38,11 +39,11 @@ rule htseq_qa:
 		htseq-qa -t fastq -o {output.qual_pdf_mate2} {input.reads2}; ) &> {log}"
 
 
-rule remove_adapters_cutadapt:
+rule pe_remove_adapters_cutadapt:
 	'''Remove adapters'''
 	input:
-		reads1 = os.path.join(get_fq_1("{sample}")),
-		reads2 = os.path.join(get_fq_2("{sample}"))
+		reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
+		reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
 	output:
 		reads1 = os.path.join(
 			config["output_dir"],
@@ -56,13 +57,13 @@ rule remove_adapters_cutadapt:
 			"{sample}.remove_adapters_mate2.fastq.gz")
 	params:
 		adapter_3_mate1 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq1_3p'],
+			samples_table.loc[wildcards.sample, 'fq1_3p'],
 		adapter_5_mate1 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq1_5p'],
+			samples_table.loc[wildcards.sample, 'fq1_5p'],
 		adapter_3_mate2 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq2_3p'],
+			samples_table.loc[wildcards.sample, 'fq2_3p'],
 		adapter_5_mate2 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq2_5p']
+			samples_table.loc[wildcards.sample, 'fq2_5p']
 	singularity:
 		"docker://zavolab/cutadapt:1.16"
 	threads: 8
@@ -85,7 +86,7 @@ rule remove_adapters_cutadapt:
 		{input.reads2}) &> {log}"
 
 
-rule remove_polya_cutadapt:
+rule pe_remove_polya_cutadapt:
 	'''Remove polyA tails'''
 	input:
 		reads1 = os.path.join(
@@ -111,9 +112,9 @@ rule remove_polya_cutadapt:
 			"{sample}.remove_polya_mate2.fastq.gz")
 	params:
 		polya_3_mate1 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq1_polya'],
+			samples_table.loc[wildcards.sample, 'fq1_polya'],
 		polya_3_mate2 = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'fq2_polya'],
+			samples_table.loc[wildcards.sample, 'fq2_polya'],
 	singularity:
 		"docker://zavolab/cutadapt:1.16"
 	threads: 8
@@ -137,65 +138,16 @@ rule remove_polya_cutadapt:
 		{input.reads2}) &> {log}'
 
 
-rule create_index_star:
-	''' Create index using STAR'''
+rule pe_map_genome_star:
+	'''Map to genome using STAR'''
 	input:
-		genome = sample_table.loc["{sample}", 'genome'],
-		gtf = sample_table.loc["{sample}", 'gtf']
-	output:
-		chromosome_info = os.path.join(
-			config["star_indexes"],
-			sample_table.loc[wildcards.sample, "organism"],
-			sample_table.loc["{sample}", "index_size"],
-			"STAR_index",
-			"chrNameLength.txt"),
-		chromosomes_names = os.path.join(
-			config["star_indexes"],
-			sample_table.loc["{sample}", "organism"],
-			sample_table.loc["{sample}", "index_size"],
-			"STAR_index",
-			"chrName.txt")
-	params:
-		output_dir = lambda wildcards:
+		index = lambda wildcards:
 			os.path.join(
 				config["star_indexes"],
-				sample_table.loc[wildcards.sample, "organism"],
-				sample_table.loc[wildcards.sample, "index_size"],
-				"STAR_index"),
-		outFileNamePrefix = os.path.join(
-				config["star_indexes"],
-				sample_table.loc[wildcards.sample, "organism"],
-				sample_table.loc[wildcards.sample, "index_size"],
-				"STAR_index/STAR_"),
-		sjdbOverhang = lambda wildcards:
-			sample_table.loc[wildcards.sample, "index_size"]
-	singularity:
-		"docker://zavolab/star:2.6.0a"
-	threads: 12
-	log:
-		os.path.join( config["local_log"], "paired_end", "{sample}", "create_index_star.log")
-	shell:
-		"(mkdir -p {params.output_dir}; \
-		chmod -R 777 {params.output_dir}; \
-		STAR \
-		--runMode genomeGenerate \
-		--sjdbOverhang {params.sjdbOverhang} \
-		--genomeDir {params.output_dir} \
-		--genomeFastaFiles {input.genome} \
-		--runThreadN {threads} \
-		--outFileNamePrefix {params.outFileNamePrefix} \
-		--sjdbGTFfile {input.gtf}) &> {log}"
-
-
-rule map_genome_star:
-	'''Map to genome using STAR'''
-	input:
-		index = os.path.join(
-			config["star_indexes"],
-			sample_table.loc["{sample}", "organism"],
-			sample_table.loc["{sample}", "index_size"],
-			"STAR_index",
-			"chrNameLength.txt"),
+				samples_table.loc[wildcards.sample, "organism"],
+				samples_table.loc[wildcards.sample, "index_size"],
+				"STAR_index",
+				"chrNameLength.txt"),
 		reads1 = os.path.join(
 			config["output_dir"],
 			"paired_end",
@@ -224,7 +176,7 @@ rule map_genome_star:
 		index = lambda wildcards:
 			os.path.join(
 				config["star_indexes"],
-				sample_table.loc[wildcards.sample, "index_size"],
+				samples_table.loc[wildcards.sample, "index_size"],
 				"STAR_index"),
 		outFileNamePrefix = os.path.join(
 			config["output_dir"],
@@ -233,11 +185,11 @@ rule map_genome_star:
 			"map_genome",
 			"{sample}_"),
 		multimappers = lambda wildcards:
-			sample_table.loc[wildcards.sample, "mulitmappers"],
+			samples_table.loc[wildcards.sample, "mulitmappers"],
 		soft_clip = lambda wildcards:
-			sample_table.loc[wildcards.sample, "soft_clip"],
+			samples_table.loc[wildcards.sample, "soft_clip"],
 		pass_mode = lambda wildcards:
-			sample_table.loc[wildcards.sample, "pass_mode"]
+			samples_table.loc[wildcards.sample, "pass_mode"]
 
 	singularity:
 		"docker://zavolab/star:2.6.0a"
@@ -271,7 +223,7 @@ rule map_genome_star:
 		--alignEndsType {params.soft_clip}} > {output.bam};) &> {log}"
 
 
-rule index_genomic_alignment_samtools:
+rule pe_index_genomic_alignment_samtools:
     '''Index the genomic alignment'''
     input:
         bam = os.path.join(
@@ -296,32 +248,7 @@ rule index_genomic_alignment_samtools:
         "(samtools index {input.bam} {output.bai};) &> {log}"
 
 
-rule create_index_salmon:
-	'''Create index for salmon quantification'''
-	input:
-		transcriptome = sample_table.loc["{sample}", 'tr_fasta_filtered']
-	output:
-		index = os.path.join(
-			config["salmon_indexes"],
-			sample_table["{sample}", 'organism'],
-			"salmon.idx")
-	params:
-		kmerLen = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'kmer']
-	singularity:
-		"docker://zavolab/salmon:0.11.0"
-	log:
-		os.path.join(config["local_log"], "paired_end", "{sample}", "create_index_salmon.log")
-	threads:	8
-	shell:
-		"(salmon index \
-		--t {input.transcriptome} \
-		--i {output.index} \
-		--k {params.kmerLen} \
-		--threads {threads}) &> {log}"
-
-
-rule quantification_salmon:
+rule pe_quantification_salmon:
 	'''Quantification at transcript and gene level using Salmon'''
 	input:
 		reads1 = os.path.join(
@@ -334,11 +261,13 @@ rule quantification_salmon:
 			"paired_end",
 			"{sample}",
 			"{sample}.remove_polya_mate2.fastq.gz"),
-		gtf = sample_table["{sample}", 'gtf_filtered'],
-		index = os.path.join(
-			config["salmon_indexes"],
-			sample_table["{sample}", 'organism'],
-			"salmon.idx")
+		gtf = lambda wildcards:
+			samples_table.loc[wildcards.sample, 'gtf_filtered'],
+		index = lambda wildcards:
+			os.path.join(
+				config["salmon_indexes"],
+				samples_table.loc[wildcards.sample, 'organism'],
+				"salmon.idx")
 	output:		
 		gn_estimates = os.path.join(
 			config["output_dir"],
@@ -359,7 +288,7 @@ rule quantification_salmon:
 			"{sample}",
 			"salmon_quant"),
 		libType = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'libtype']
+			samples_table.loc[wildcards.sample, 'libtype']
 	log:
 		os.path.join(config["local_log"], "paired_end", "{sample}", "genome_quantification_salmon.log")
 	threads:	6
@@ -379,31 +308,7 @@ rule quantification_salmon:
         -o {params.output_dir}) &> {log}"
 
 
-rule create_index_kallisto:
-	'''Create index for running Kallisto'''
-	input:
-		transcriptome = sample_table[,"{sample}", 'tr_fasta_filtered']
-	output:
-		index = os.path.join(
-			config["kallisto_indexes"],
-			sample_table["{sample}", 'organism'],
-			"kallisto.idx")
-	params:
-		output_dir = lambda wildcards:
-			os.path.join(
-				config["kallisto_indexes"],
-				sample_table[wildcards.sample, 'organism'])
-	singularity:
-		"docker://zavolab/kallisto:0.9"
-	log:
-		os.path.join(config["local_log"], "paired_end", "{sample}", "create_index_kallisto.log")
-	shell:
-		"(mkdir -p {params.output_dir}; \
-		chmod -R 777 {params.output_dir}; \
-		kallisto index -i {output.index} {input.transcriptome}) &> {log}"
-
-
-rule genome_quantification_kallisto:
+rule pe_genome_quantification_kallisto:
 	'''Quantification at transcript and gene level using Kallisto'''
 	input:
 		reads1 = os.path.join(
@@ -416,10 +321,11 @@ rule genome_quantification_kallisto:
 			"paired_end",
 			"{sample}",
 			"{sample}.remove_polya_mate2.fastq.gz"),
-		index = os.path.join(
-			config["kallisto_indexes"],
-			sample_table["{sample}", 'organism'],
-			"kallisto.idx")
+		index = lambda wildcards:
+			os.path.join(
+				config["kallisto_indexes"],
+				samples_table.loc[wildcards.sample, 'organism'],
+				"kallisto.idx")
 	output:
 		pseudoalignment = os.path.join(
 			config["output_dir"],
@@ -435,7 +341,7 @@ rule genome_quantification_kallisto:
 				wildcards.sample,
 				"quant_kallisto"),
 		directionality = lambda wildcards:
-			sample_table.loc[wildcards.sample, "kallisto_directionality"]
+			samples_table.loc[wildcards.sample, "kallisto_directionality"]
 	singularity:
 		"docker://zavolab/kallisto:0.9"
 	threads:	8
diff --git a/snakemake/single_end.snakefile b/snakemake/single_end.snakefile
index 72abaec..970dec4 100644
--- a/snakemake/single_end.snakefile
+++ b/snakemake/single_end.snakefile
@@ -1,8 +1,8 @@
-
+import os
 rule fastqc:
 	''' A quality control tool for high throughput sequence data. '''
 	input:
-		reads = os.path.join(get_fq_1("{sample}"))
+		reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
 	output:
 		outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "fastqc"))
 	singularity:
@@ -19,7 +19,7 @@ rule fastqc:
 rule htseq_qa:
 	''' Assess the technical quality of a run. '''
 	input:
-		reads = os.path.join(get_fq_1("{sample}"))
+		reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
 	output:
 		qual_pdf = os.path.join(config["output_dir"], "single_end", "{sample}", "htseq_quality.pdf")
 	singularity:
@@ -36,14 +36,14 @@ rule htseq_qa:
 rule remove_adapters_cutadapt:
 	''' Remove adapters '''
 	input:
-		reads = os.path.join(get_fq_1("{sample}"))
+		reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
 	output:
 		reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters.fastq.gz")
 	params:
 		adapters_3 = lambda wildcards: 
-			sample_table.loc[wildcards.sample, 'fq1_3p']
+			samples_table.loc[wildcards.sample, 'fq1_3p'],
 		adapters_5 = lambda wildcards: 
-			sample_table.loc[wildcards.sample, 'fq1_5p']
+			samples_table.loc[wildcards.sample, 'fq1_5p']
 
 	singularity:
 		"docker://zavolab/cutadapt:1.16"
@@ -66,13 +66,12 @@ rule remove_adapters_cutadapt:
 rule remove_polya_cutadapt:
 	''' Remove ployA  tails'''
 	input:
-		reads = os.path.join(get_fq_1("{sample}"))
+		reads = lambda wildcards: samples_table[wildcards.sample, "fq1"]
 	output:
 		reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
 	params:
-		polya_3 = 
-		adapters_3 = lambda wildcards: 
-			sample_table.loc[wildcards.sample, "fq1_polya"]
+		polya_3 = lambda wildcards: 
+			samples_table.loc[wildcards.sample, "fq1_polya"]
 	singularity:
 		"docker://zavolab/cutadapt:1.16"
 	threads: 8
@@ -92,63 +91,15 @@ rule remove_polya_cutadapt:
 		{input.reads}) &> {log}"
 
 
-rule create_index_star:
-	''' Create index using STAR. '''
-	input:
-		genome = sample_table.loc["{sample}", "genome"],
-		gtf = sample_table.loc["{sample}", "gtf"]
-	output:
-		chromosome_info = os.path.join(
-					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"],
-					sample_table.loc["{sample}", "index_size"], 
-					"STAR_index","chrNameLength.txt"),
-		chromosome_names = os.path.join(
-					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"], 
-					sample_table.loc["{sample}", "index_size"], 
-					"STAR_index", "chrName.txt")
-	params:
-		output_dir = lambda wildcards:
-				os.path.join(
-					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"],
-					sample_table.loc[wildcards.sample, "index_size"], 
-					"STAR_index"),
-		outFileNamePrefix = lambda wildcards:
-				os.path.join(
-					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"],
-					sample_table.loc[wildcards.sample, "index_size"], 
-					"STAR_index/STAR_"),
-		sjdbOverhang = lambda wildcards:
-			sample_table.loc[wildcards.sample, "index_size"]
-	singularity:
-		"docker://zavolab/star:2.6.0a"
-	threads: 12
-	log:
-		os.path.join(config["local_log"], "single_end", "{sample}", "create_index_star.log")
-	shell:
-		"(mkdir -p {params.output_dir}; \
-		chmod -R 777 {params.output_dir}; \
-		STAR \
-		--runMode genomeGenerate \
-		--sjdbOverhang {params.sjdbOverhang} \
-		--genomeDir {params.output_dir} \
-		--genomeFastaFiles {input.genome} \
-		--runThreadN {threads} \
-		--outFileNamePrefix {params.outFileNamePrefix} \
-		--sjdbGTFfile {input.gtf}) &> {log}"
-
-
 rule map_genome_star:
 	''' Map to genome using STAR. '''
 	input:
-		index = os.path.join(
-					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"],
-					sample_table.loc["{sample}", "index_size"], 
-					"STAR_index","chrNameLength.txt"),
+		index = lambda wildcards:
+			os.path.join(
+				config["star_indexes"],
+				samples_table.loc[wildcards.sample, "organism"],
+				samples_table.loc[wildcards.sample, "index_size"], 
+				"STAR_index","chrNameLength.txt"),
 		reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
 	output:
 		bam = os.path.join(config["output_dir"], "single_end", 
@@ -160,12 +111,12 @@ rule map_genome_star:
 			"map_genome", 
 			"{sample}_Log.final.out")
 	params:
-		sample_id = "{sample}"
+		sample_id = "{sample}",
 		index = lambda wildcards:
 				os.path.join(
 					config["star_indexes"],
-					sample_table.loc["{sample}", "organism"],
-					sample_table.loc[wildcards.sample, "index_size"], 
+					samples_table.loc["{sample}", "organism"],
+					samples_table.loc[wildcards.sample, "index_size"], 
 					"STAR_index"),
 		outFileNamePrefix = lambda wildcards:
 				os.path.join(
@@ -173,11 +124,11 @@ rule map_genome_star:
 					"single_end", 
 					"{sample}", "map_genome", "{sample}_"),
 		multimappers = lambda wildcards:
-				sample_table.loc[wildcards.sample, "multimappers"],
+				samples_table.loc[wildcards.sample, "multimappers"],
 		soft_clip = lambda wildcards:
-				sample_table.loc[wildcards.sample, "soft_clip"],
+				samples_table.loc[wildcards.sample, "soft_clip"],
 		pass_mode = lambda wildcards:
-				sample_table.loc[wildcards.sample, "pass_mode"],		
+				samples_table.loc[wildcards.sample, "pass_mode"],		
 	singularity:
 		"docker://zavolab/star:2.6.0a"
 	threads: 12
@@ -208,128 +159,80 @@ rule map_genome_star:
 
 
 rule index_genomic_alignment_samtools:
-    '''Index genome bamfile using samtools.'''
-    input:
-        bam = os.path.join(config["output_dir"], 
-        	"single_end", 
-        	"{sample}", 
+	'''Index genome bamfile using samtools.'''
+	input:
+		bam = os.path.join(config["output_dir"],
+			"single_end", 
+			"{sample}", 
 			"map_genome", 
 			"{sample}_Aligned.sortedByCoord.out.bam")
-    output:
-        bai = os.path.join(config["output_dir"], 
-        	"single_end", 
+	output:
+		bai = os.path.join(config["output_dir"], 
+			"single_end", 
 			"{sample}", 
 			"map_genome", 
 			"{sample}_Aligned.sortedByCoord.out.bam.bai")
-    singularity:
-        "docker://zavolab/samtools:1.8"
-    threads: 1
-    log:
-		os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
-    shell:
-        "(samtools index {input.bam} {output.bai};) &> {log}"
-
-
-rule create_index_salmon:
-	''' Create index for Salmon quantification.  '''
-	input:
-		transcriptome = sample_table.loc[wildcards.sample, 'tr_fasta_filtered']
-	output:
-		index = os.path.join(
-			config["salmon_indexes"],
-			sample_table["{sample}", 'organism'],
-			"salmon.idx")
-	params:
-		kmerLen = lambda wildcards:
-			sample_table.loc[wildcards.sample, 'kmer']
 	singularity:
-		"docker://zavolab/salmon:0.11.0"
+		"docker://zavolab/samtools:1.8"
+	threads: 1
 	log:
-		os.path.join(config["local_log"], "single_end", "{sample}", "create_index_salmon.log")
-	threads:	8
+		os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
 	shell:
-		"(salmon index \
-		--t {input.transcriptome} \
-		--i {output.index} \
-		--k {params.kmerLen} \
-		--threads {threads}) &> {log}"
+		"(samtools index {input.bam} {output.bai};) &> {log}"
 
 
 rule quantification_salmon:
 	''' Quantification at transcript and gene level using Salmon. '''
-    input:
-    	reads = os.path.join(
+	input:
+		reads = os.path.join(
 			config["output_dir"], 
 			"single_end", 
 			"{sample}", 
 			"{sample}.remove_polya.fastq.gz"),
-        index = os.path.join(
-			config["salmon_indexes"],
-			sample_table["{sample}", 'organism'],
-			"salmon.idx"),
-       	gtf = sample_table.loc["{sample}", "gtf_filtered"]
-    output:
-    	gn_estimates = os.path.join(
+		index = lambda wildcards:
+			os.path.join(
+				config["salmon_indexes"],
+				samples_table[wildcards.sample, 'organism'],
+				"salmon.idx"),
+	   	gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"]
+	output:
+		gn_estimates = os.path.join(
 			config["output_dir"], 
 			"single_end", 
 			"{sample}", 
 			"salmon_quant",
 			"quant.genes.sf"),
-    	tr_estimates = os.path.join(
+		tr_estimates = os.path.join(
 			config["output_dir"], 
 			"single_end", 
 			"{sample}", 
 			"salmon_quant",
 			"quant.sf")
-    params:
-        output_dir = os.path.join(
+	params:
+		output_dir = os.path.join(
 			config["output_dir"], 
 			"single_end", 
 			"{sample}", 
 			"salmon_quant"),
-        libType = lambda wildcards:
-     			sample_table.loc[wildcards.sample, "libtype"]
-    log:
-		os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
-    threads:    12
-    conda:
-        "envs/salmon.yaml"
-    shell:
-        "(salmon quant \
-        --libType {params.libType} \
-        --seqBias \
-        --validateMappings \
-        --threads {threads} \
-        --writeUnmappedNames \
-        --index {input.index} \
-        --geneMap {input.gtf} \
-        --unmatedReads {input.reads} \
-        -o {params.output_dir}) &> {log}"
-
-
-
-rule create_index_kallisto:
-	''' Create index for running Kallisto. '''
-	input:
-		transcriptome = sample_table.loc["{sample}", 'tr_fasta_filtered']
-	output:
-		index = os.path.join(
-			config["kallisto_indexes"],
-			sample_table.loc["{sample}", "organism"],
-		 	"kallisto.idx")
-	params:
-		output_dir = lambda wildcards:
-			os.path.join(
-				config["kallisto_indexes"], 
-				sample_table.loc["{sample}", "organism"]),
+		libType = lambda wildcards:
+	 			samples_table.loc[wildcards.sample, "libtype"]
 	log:
-		os.path.join(config["local_log"], "single_end", "{sample}", "create_index_kallisto.log")
-	singularity:
-		"docker://zavolab/kallisto:0.9"
+		os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
+	threads:    12
+	conda:
+		"envs/salmon.yaml"
 	shell:
-		"(mkdir -p {params.output_dir}; \
-		chmod -R 777 {params.output_dir}; \
-		kallisto index -i {output.index} {input.transcriptome}) &> {log}"
+		"(salmon quant \
+		--libType {params.libType} \
+		--seqBias \
+		--validateMappings \
+		--threads {threads} \
+		--writeUnmappedNames \
+		--index {input.index} \
+		--geneMap {input.gtf} \
+		--unmatedReads {input.reads} \
+		-o {params.output_dir}) &> {log}"
+
 
 rule genome_quantification_kallisto:
 	''' Quantification at transcript and gene level using Kallisto. '''
@@ -339,10 +242,11 @@ rule genome_quantification_kallisto:
 			"single_end", 
 			"{sample}", 
 			"{sample}.remove_polya.fastq.gz"),
-		index = os.path.join(
-			config["kallisto_indexes"],
-			sample_table.loc["{sample}", "organism"],
-		 	"kallisto.idx")
+		index = lambda wildcards:
+			os.path.join(
+				config["kallisto_indexes"],
+				samples_table.loc[wildcards.sample, "organism"],
+				"kallisto.idx")
 	output:
 		pseudoalignment = os.path.join(
 			config["output_dir"], 
@@ -356,9 +260,9 @@ rule genome_quantification_kallisto:
 				"single_end", 
 				"{sample}", 
 				"quant_kallisto"),
-		fraglen = lambda wildcards: sample_table.loc[wildcards.sample, 'mean'],
-		fragsd = lambda wildcards: sample_table.loc[wildcards.sample, 'sd'],
-		directionality = lambda wildcards: sample_table.loc[wildcards.sample, 'kallisto_directionality']
+		fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'],
+		fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'],
+		directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality']
 	threads:	    8
 	log:
 		os.path.join(config["local_log"],"kallisto_align_{sample}.log")
diff --git a/tests/samples.tsv b/tests/samples.tsv
new file mode 100644
index 0000000..fd5216c
--- /dev/null
+++ b/tests/samples.tsv
@@ -0,0 +1,3 @@
+sample	fq1	fq2
+LN18C_rep1	/Users/foivosgypas/Desktop/samples/BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R1_001_MM_1.fastq.gz	/Users/foivosgypas/Desktop/samples/BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R2_001_MM_1.fastq.gz
+LN18C_rep2	/Users/foivosgypas/Desktop/samples/BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R1_001_MM_1.fastq.gz	/Users/foivosgypas/Desktop/samples/BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R2_001_MM_1.fastq.gz
-- 
GitLab