From cd2052dd15a3b559618afcaafd15e875a0f4acaf Mon Sep 17 00:00:00 2001
From: BIOPZ-Herrmann Christina <christina.herrmann@unibas.ch>
Date: Fri, 13 Dec 2019 12:04:36 +0100
Subject: [PATCH] Started working on subpipelines

---
 prepare_annotation/Snakefile         | 24 +--------------------
 process_data/Snakefile               | 31 ++++++++++++++++++++++++----
 process_data/config.yaml             |  3 +++
 process_data/preprocessing.snakefile | 28 +++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 27 deletions(-)
 create mode 100644 process_data/preprocessing.snakefile

diff --git a/prepare_annotation/Snakefile b/prepare_annotation/Snakefile
index efd220e..94c9db7 100644
--- a/prepare_annotation/Snakefile
+++ b/prepare_annotation/Snakefile
@@ -110,29 +110,7 @@ rule generate_segemehl_index_other_RNAs:
 ### Index genome STAR
 #################################################################################
 
-rule index_genome_STAR:
-	input:
-		genome = os.path.join(config["output_dir"], "genome.fa"),
-		annotation = os.path.join(config["output_dir"], "annotation.gtf")
-	output:
-		output = os.path.join(config["output_dir"], "STAR_index")
-	params:
-		outputdir = os.path.join(config["output_dir"],"STAR_index"),
-		sjdbOverhang = config["sjdbOverhang"]
-	threads:	8
-	singularity:
-		"docker://zavolab/star:2.6.0a"
-	log:
-		os.path.join(config["local_log"],"index_genome_STAR.log")
-	shell:
-		"mkdir -p {output.output}; \
-		chmod -R 777 {output.output}; \
-		(STAR --runMode genomeGenerate \
-		--sjdbOverhang {params.sjdbOverhang} \
-		--genomeDir {params.outputdir} \
-		--genomeFastaFiles {input.genome} \
-		--runThreadN {threads} \
-		--sjdbGTFfile {input.annotation}) &> {log}"
+
 
 ##################################################################################
 ### Filter protein coding and lncRNA transcripts
diff --git a/process_data/Snakefile b/process_data/Snakefile
index edd9baa..0343922 100644
--- a/process_data/Snakefile
+++ b/process_data/Snakefile
@@ -1,5 +1,16 @@
+import pandas as pd
+
 configfile: "config.yaml"
 
+
+
+
+samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python')
+
+samples['out_name'] = samples['Sample_name'] + samples['Library_Type']
+
+
+
 localrules: finish
 
 #################################################################################
@@ -8,10 +19,22 @@ localrules: finish
 
 rule finish:
 	input:
-		fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]),
-		htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]),
-		gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]),
-		bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"])
+		final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values),
+
+		#fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]),
+		#htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]),
+		#gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]),
+		#bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"])
+
+
+##################################################################################
+# Execution dependend on sequencing mode
+##################################################################################
+
+
+include: 'paired_end.snakefile'
+include: 'single_end.snakefile'
+
 
 ##################################################################################
 ### Fastqc
diff --git a/process_data/config.yaml b/process_data/config.yaml
index d2c34ba..65bdfa9 100644
--- a/process_data/config.yaml
+++ b/process_data/config.yaml
@@ -2,6 +2,7 @@
   ##############################################################################
   ### Annotation
   ##############################################################################
+  organism: "Homo_sapiens"
   annotation: "../prepare_annotation/results/annotation.gtf"
   genome: "../prepare_annotation/results/genome.fa"
   annotation_filtered: "../prepare_annotation/results/filtered_transcripts.gtf"
@@ -12,6 +13,8 @@
   ##############################################################################
   ### Output and log directories
   ##############################################################################
+  database_path: "/scicore/home/zavolan/GROUP/Rna_Seq_pipeline/Blabla"
+  STAR_idx_folder: "STAR_indices"
   output_dir: "results"
   local_log: "logs/local_log"
   cluster_log: "logs/cluster_log"
diff --git a/process_data/preprocessing.snakefile b/process_data/preprocessing.snakefile
new file mode 100644
index 0000000..54d1d32
--- /dev/null
+++ b/process_data/preprocessing.snakefile
@@ -0,0 +1,28 @@
+
+
+rule index_genome_STAR:
+    '''
+    Create Star index
+    '''
+	input:
+		genome = os.path.join(config["output_dir"], "genome.fa"),
+		annotation = os.path.join(config["output_dir"], "annotation.gtf")
+	output:
+		output = os.path.join(config["database_path"], config['organism'], config['STAR_idx_folder], "STAR_index" + {sjdb})
+	params:
+		outputdir = os.path.join(config["output_dir"],"STAR_index"),
+		sjdb = lambda wildcards: samples.loc['sjdb']
+	threads:	8
+	singularity:
+		"docker://zavolab/star:2.6.0a"
+	log:
+		os.path.join(config["local_log"],"index_genome_STAR.log")
+	shell:
+		"mkdir -p {output.output}; \
+		chmod -R 777 {output.output}; \
+		(STAR --runMode genomeGenerate \
+		--sjdbOverhang {params.sjdbOverhang} \
+		--genomeDir {params.outputdir} \
+		--genomeFastaFiles {input.genome} \
+		--runThreadN {threads} \
+		--sjdbGTFfile {input.annotation}) &> {log}"
\ No newline at end of file
-- 
GitLab