From 100e648eb028cd41589f575da7e1b574eaa644e2 Mon Sep 17 00:00:00 2001
From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch>
Date: Thu, 10 Jan 2019 10:59:24 +0100
Subject: [PATCH] Removed cluster_log from rules in prepare_annotation
 Snakefile. Removed create directories rule in prepare_annotation Snakefile.
 Altered cluster.json in prepare_annotation Snakefile. Removed unecessary info
 from config.yaml in prepare_annotation Snakefile.

---
 snakemake/prepare_annotation/Snakefile        | 33 +--------------
 snakemake/prepare_annotation/cluster.json     | 42 +++++++++----------
 snakemake/prepare_annotation/config.yaml      | 12 +-----
 snakemake/prepare_annotation/run_snakefile.sh |  5 ++-
 4 files changed, 28 insertions(+), 64 deletions(-)

diff --git a/snakemake/prepare_annotation/Snakefile b/snakemake/prepare_annotation/Snakefile
index 5320edf..ff32216 100644
--- a/snakemake/prepare_annotation/Snakefile
+++ b/snakemake/prepare_annotation/Snakefile
@@ -1,6 +1,6 @@
 configfile: "config.yaml"
 
-localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish
+localrules: create_tab_delimited_CDS_file, create_bed_CDS_file, finish
 
 #################################################################################
 ### Finish rule
@@ -12,36 +12,16 @@ rule finish:
 		idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"),
 		bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
 
-#################################################################################
-### Create output and log directories
-#################################################################################
-
-rule create_output_and_log_directories:
-	output:
-		output_dir = config["output_dir"],
-		cluster_log = config["cluster_log"],
-		local_log = config["local_log"],
-		flag = config["dir_created"]
-	threads:	1
-	shell:
-		"mkdir -p {output.output_dir}; \
-		mkdir -p {output.cluster_log}; \
-		mkdir -p {output.local_log}; \
-		touch {output.flag};"
-
 #################################################################################
 ### Select longest protein coding transcripts
 #################################################################################
 
 rule select_longest_coding_transcripts:
 	input:
-		flag = config["dir_created"],
 		gtf = config["gtf"],
 		script = os.path.join(config["scripts"], "find_longest_coding_transcripts.py")
 	output:
 		gtf = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.gtf")
-	params:
-		cluster_log =  os.path.join(config["cluster_log"], "select_longest_coding_transcript.log")
 	log:
 		os.path.join(config["local_log"], "select_longest_coding_transcript.log")
 	singularity:
@@ -58,12 +38,9 @@ rule select_longest_coding_transcripts:
 
 rule generate_segemehl_index_other_RNAs:
 	input:
-		flag = config["dir_created"],
 		sequence = config["other_RNAs_sequence"]
 	output:
 		idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_other_RNAs.log")
 	log:
 		os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log")
 	singularity:
@@ -81,8 +58,6 @@ rule extract_transcript_sequences:
 		genome = config["genome"]
 	output:
 		transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "extract_transcript_sequences.log")
 	log:
 		os.path.join(config["local_log"], "extract_transcript_sequences.log")
 	singularity:
@@ -103,8 +78,6 @@ rule create_tab_delimited_CDS_file:
 		script = os.path.join(config["scripts"], "create_tab_delimited_CDS_file.py")
 	output:
 		tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "create_tab_delimited_CDS_file.log")
 	log:
 		os.path.join(config["local_log"], "create_tab_delimited_CDS_file.log")
 	singularity:
@@ -124,8 +97,6 @@ rule create_bed_CDS_file:
 		tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
 	output:
 		bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log")
 	log:
 		os.path.join(config["local_log"], "create_bed_CDS_file.log")
 	# singularity:
@@ -142,8 +113,6 @@ rule generate_segemehl_index_transcripts:
 		sequence = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
 	output:
 		idx = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_transcripts.log")
 	log:
 		os.path.join(config["local_log"], "generate_segemehl_index_transcripts.log")
 	singularity:
diff --git a/snakemake/prepare_annotation/cluster.json b/snakemake/prepare_annotation/cluster.json
index 7845df8..504c3fd 100644
--- a/snakemake/prepare_annotation/cluster.json
+++ b/snakemake/prepare_annotation/cluster.json
@@ -1,23 +1,23 @@
 {
-"__default__":
-{
-"queue":"6hours",
-"time": "05:00:00",
-"threads":"1",
-"mem":"4G"
-},
-"generate_segemehl_index_other_RNAs":
-{
-"queue":"6hours",
-"time": "06:00:00",
-"threads":"8",
-"mem":"50G"
-},
-"generate_segemehl_index_transcripts":
-{
-"queue":"6hours",
-"time": "06:00:00",
-"threads":"8",
-"mem":"50G"
-}
+  "__default__" :
+  {
+    "queue": "6hours",
+    "time": "05:00:00",
+    "threads": "1",
+    "mem": "4G",
+    "name": "{rule}.{wildcards}",
+    "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
+  },
+  "generate_segemehl_index_other_RNAs":
+  {
+    "time": "06:00:00",
+    "threads":"8",
+    "mem":"50G"
+  },
+  "generate_segemehl_index_transcripts":
+  {
+    "time": "06:00:00",
+    "threads":"8",
+    "mem":"50G"
+  }
 }
diff --git a/snakemake/prepare_annotation/config.yaml b/snakemake/prepare_annotation/config.yaml
index 2f104b8..02a2810 100644
--- a/snakemake/prepare_annotation/config.yaml
+++ b/snakemake/prepare_annotation/config.yaml
@@ -9,15 +9,7 @@
   ### Output and log directory
   ##############################################################################
   output_dir: "results"
-  local_log: "results/local_log"
-  cluster_log: "results/cluster_log"
-  dir_created: "results/dir_created"
+  local_log: "logs/local_log"
+  cluster_log: "logs/cluster_log"
   scripts: "scripts"
-  ##############################################################################
-  ### sample info
-  ##############################################################################
-  input_dir: "samples"
-  input_reads_pattern: ".fastq.gz"
-  sample: ["example"]
-  example: {adapter: GATCGGAAGAGCACA}
 ...
diff --git a/snakemake/prepare_annotation/run_snakefile.sh b/snakemake/prepare_annotation/run_snakefile.sh
index 6791403..7a004f2 100755
--- a/snakemake/prepare_annotation/run_snakefile.sh
+++ b/snakemake/prepare_annotation/run_snakefile.sh
@@ -1,5 +1,8 @@
 # set -e
 
+mkdir -p logs/cluster_log
+mkdir -p logs/local_log
+
 snakemake \
 --cluster-config cluster.json \
 --cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \
@@ -7,4 +10,4 @@ snakemake \
 -p \
 --rerun-incomplete \
 --use-singularity \
---singularity-args "--bind ${PWD}"
\ No newline at end of file
+--singularity-args "--bind ${PWD}"
-- 
GitLab