Removed cluster_log from rules in prepare_annotation Snakefile. Removed create...

Removed cluster_log from rules in prepare_annotation Snakefile. Removed create directories rule in prepare_annotation Snakefile. Altered cluster.json in prepare_annotation Snakefile. Removed unecessary info from config.yaml in prepare_annotation Snakefile.

Removed cluster_log from rules in prepare_annotation Snakefile. Removed create...
100e648e · BIOPZ-Gypas Foivos · d18d5994 · 100e648e · 100e648e · 100e648e
Commit 100e648e authored 6 years ago by BIOPZ-Gypas Foivos
--- a/snakemake/prepare_annotation/Snakefile
+++ b/snakemake/prepare_annotation/Snakefile
 configfile: "config.yaml"
-localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish
+localrules: create_tab_delimited_CDS_file, create_bed_CDS_file, finish
 #################################################################################
 ### Finish rule
@@ -12,36 +12,16 @@ rule finish:
 		idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"),
 		bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
-#################################################################################
-### Create output and log directories
-#################################################################################
-rule create_output_and_log_directories:
-	output:
-		output_dir = config["output_dir"],
-		cluster_log = config["cluster_log"],
-		local_log = config["local_log"],
-		flag = config["dir_created"]
-	threads:	1
-	shell:
-		"mkdir -p {output.output_dir}; \
-		mkdir -p {output.cluster_log}; \
-		mkdir -p {output.local_log}; \
-		touch {output.flag};"
 #################################################################################
 ### Select longest protein coding transcripts
 #################################################################################
 rule select_longest_coding_transcripts:
 	input:
-		flag = config["dir_created"],
 		gtf = config["gtf"],
 		script = os.path.join(config["scripts"], "find_longest_coding_transcripts.py")
 	output:
 		gtf = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.gtf")
-	params:
-		cluster_log =  os.path.join(config["cluster_log"], "select_longest_coding_transcript.log")
 	log:
 		os.path.join(config["local_log"], "select_longest_coding_transcript.log")
 	singularity:
@@ -58,12 +38,9 @@ rule select_longest_coding_transcripts:
 rule generate_segemehl_index_other_RNAs:
 	input:
-		flag = config["dir_created"],
 		sequence = config["other_RNAs_sequence"]
 	output:
 		idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_other_RNAs.log")
 	log:
 		os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log")
 	singularity:
@@ -81,8 +58,6 @@ rule extract_transcript_sequences:
 		genome = config["genome"]
 	output:
 		transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "extract_transcript_sequences.log")
 	log:
 		os.path.join(config["local_log"], "extract_transcript_sequences.log")
 	singularity:
@@ -103,8 +78,6 @@ rule create_tab_delimited_CDS_file:
 		script = os.path.join(config["scripts"], "create_tab_delimited_CDS_file.py")
 	output:
 		tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "create_tab_delimited_CDS_file.log")
 	log:
 		os.path.join(config["local_log"], "create_tab_delimited_CDS_file.log")
 	singularity:
@@ -124,8 +97,6 @@ rule create_bed_CDS_file:
 		tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
 	output:
 		bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log")
 	log:
 		os.path.join(config["local_log"], "create_bed_CDS_file.log")
 	# singularity:
@@ -142,8 +113,6 @@ rule generate_segemehl_index_transcripts:
 		sequence = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
 	output:
 		idx = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx")
-	params:
-		cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_transcripts.log")
 	log:
 		os.path.join(config["local_log"], "generate_segemehl_index_transcripts.log")
 	singularity:

--- a/snakemake/prepare_annotation/cluster.json
+++ b/snakemake/prepare_annotation/cluster.json
 {
-"__default__":
+  "__default__" :
-{
+  {
-"queue":"6hours",
+    "queue": "6hours",
-"time": "05:00:00",
+    "time": "05:00:00",
-"threads":"1",
+    "threads": "1",
-"mem":"4G"
+    "mem": "4G",
-},
+    "name": "{rule}.{wildcards}",
-"generate_segemehl_index_other_RNAs":
+    "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
-{
+  },
-"queue":"6hours",
+  "generate_segemehl_index_other_RNAs":
-"time": "06:00:00",
+  {
-"threads":"8",
+    "time": "06:00:00",
-"mem":"50G"
+    "threads":"8",
-},
+    "mem":"50G"
-"generate_segemehl_index_transcripts":
+  },
-{
+  "generate_segemehl_index_transcripts":
-"queue":"6hours",
+  {
-"time": "06:00:00",
+    "time": "06:00:00",
-"threads":"8",
+    "threads":"8",
-"mem":"50G"
+    "mem":"50G"
-}
+  }
 }
--- a/snakemake/prepare_annotation/config.yaml
+++ b/snakemake/prepare_annotation/config.yaml
@@ -9,15 +9,7 @@
  ### Output and log directory
  ##############################################################################
  output_dir: "results"
-  local_log: "results/local_log"
+  local_log: "logs/local_log"
-  cluster_log: "results/cluster_log"
+  cluster_log: "logs/cluster_log"
-  dir_created: "results/dir_created"
  scripts: "scripts"
-  ##############################################################################
-  ### sample info
-  ##############################################################################
-  input_dir: "samples"
-  input_reads_pattern: ".fastq.gz"
-  sample: ["example"]
-  example: {adapter: GATCGGAAGAGCACA}
 ...
--- a/snakemake/prepare_annotation/run_snakefile.sh
+++ b/snakemake/prepare_annotation/run_snakefile.sh
 # set -e
+mkdir -p logs/cluster_log
+mkdir -p logs/local_log
 snakemake \
 --cluster-config cluster.json \
 --cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \
@@ -7,4 +10,4 @@ snakemake \
 -p \
 --rerun-incomplete \
 --use-singularity \
 --singularity-args "--bind ${PWD}"
\ No newline at end of file