From 100e648eb028cd41589f575da7e1b574eaa644e2 Mon Sep 17 00:00:00 2001 From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch> Date: Thu, 10 Jan 2019 10:59:24 +0100 Subject: [PATCH] Removed cluster_log from rules in prepare_annotation Snakefile. Removed create directories rule in prepare_annotation Snakefile. Altered cluster.json in prepare_annotation Snakefile. Removed unecessary info from config.yaml in prepare_annotation Snakefile. --- snakemake/prepare_annotation/Snakefile | 33 +-------------- snakemake/prepare_annotation/cluster.json | 42 +++++++++---------- snakemake/prepare_annotation/config.yaml | 12 +----- snakemake/prepare_annotation/run_snakefile.sh | 5 ++- 4 files changed, 28 insertions(+), 64 deletions(-) diff --git a/snakemake/prepare_annotation/Snakefile b/snakemake/prepare_annotation/Snakefile index 5320edf..ff32216 100644 --- a/snakemake/prepare_annotation/Snakefile +++ b/snakemake/prepare_annotation/Snakefile @@ -1,6 +1,6 @@ configfile: "config.yaml" -localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish +localrules: create_tab_delimited_CDS_file, create_bed_CDS_file, finish ################################################################################# ### Finish rule @@ -12,36 +12,16 @@ rule finish: idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"), bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") -################################################################################# -### Create output and log directories -################################################################################# - -rule create_output_and_log_directories: - output: - output_dir = config["output_dir"], - cluster_log = config["cluster_log"], - local_log = config["local_log"], - flag = config["dir_created"] - threads: 1 - shell: - "mkdir -p {output.output_dir}; \ - mkdir -p {output.cluster_log}; \ - mkdir -p {output.local_log}; \ - touch {output.flag};" - ################################################################################# ### Select longest protein coding transcripts ################################################################################# rule select_longest_coding_transcripts: input: - flag = config["dir_created"], gtf = config["gtf"], script = os.path.join(config["scripts"], "find_longest_coding_transcripts.py") output: gtf = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.gtf") - params: - cluster_log = os.path.join(config["cluster_log"], "select_longest_coding_transcript.log") log: os.path.join(config["local_log"], "select_longest_coding_transcript.log") singularity: @@ -58,12 +38,9 @@ rule select_longest_coding_transcripts: rule generate_segemehl_index_other_RNAs: input: - flag = config["dir_created"], sequence = config["other_RNAs_sequence"] output: idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx") - params: - cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_other_RNAs.log") log: os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log") singularity: @@ -81,8 +58,6 @@ rule extract_transcript_sequences: genome = config["genome"] output: transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa") - params: - cluster_log = os.path.join(config["cluster_log"], "extract_transcript_sequences.log") log: os.path.join(config["local_log"], "extract_transcript_sequences.log") singularity: @@ -103,8 +78,6 @@ rule create_tab_delimited_CDS_file: script = os.path.join(config["scripts"], "create_tab_delimited_CDS_file.py") output: tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") - params: - cluster_log = os.path.join(config["cluster_log"], "create_tab_delimited_CDS_file.log") log: os.path.join(config["local_log"], "create_tab_delimited_CDS_file.log") singularity: @@ -124,8 +97,6 @@ rule create_bed_CDS_file: tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") output: bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") - params: - cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log") log: os.path.join(config["local_log"], "create_bed_CDS_file.log") # singularity: @@ -142,8 +113,6 @@ rule generate_segemehl_index_transcripts: sequence = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa") output: idx = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx") - params: - cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_transcripts.log") log: os.path.join(config["local_log"], "generate_segemehl_index_transcripts.log") singularity: diff --git a/snakemake/prepare_annotation/cluster.json b/snakemake/prepare_annotation/cluster.json index 7845df8..504c3fd 100644 --- a/snakemake/prepare_annotation/cluster.json +++ b/snakemake/prepare_annotation/cluster.json @@ -1,23 +1,23 @@ { -"__default__": -{ -"queue":"6hours", -"time": "05:00:00", -"threads":"1", -"mem":"4G" -}, -"generate_segemehl_index_other_RNAs": -{ -"queue":"6hours", -"time": "06:00:00", -"threads":"8", -"mem":"50G" -}, -"generate_segemehl_index_transcripts": -{ -"queue":"6hours", -"time": "06:00:00", -"threads":"8", -"mem":"50G" -} + "__default__" : + { + "queue": "6hours", + "time": "05:00:00", + "threads": "1", + "mem": "4G", + "name": "{rule}.{wildcards}", + "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out" + }, + "generate_segemehl_index_other_RNAs": + { + "time": "06:00:00", + "threads":"8", + "mem":"50G" + }, + "generate_segemehl_index_transcripts": + { + "time": "06:00:00", + "threads":"8", + "mem":"50G" + } } diff --git a/snakemake/prepare_annotation/config.yaml b/snakemake/prepare_annotation/config.yaml index 2f104b8..02a2810 100644 --- a/snakemake/prepare_annotation/config.yaml +++ b/snakemake/prepare_annotation/config.yaml @@ -9,15 +9,7 @@ ### Output and log directory ############################################################################## output_dir: "results" - local_log: "results/local_log" - cluster_log: "results/cluster_log" - dir_created: "results/dir_created" + local_log: "logs/local_log" + cluster_log: "logs/cluster_log" scripts: "scripts" - ############################################################################## - ### sample info - ############################################################################## - input_dir: "samples" - input_reads_pattern: ".fastq.gz" - sample: ["example"] - example: {adapter: GATCGGAAGAGCACA} ... diff --git a/snakemake/prepare_annotation/run_snakefile.sh b/snakemake/prepare_annotation/run_snakefile.sh index 6791403..7a004f2 100755 --- a/snakemake/prepare_annotation/run_snakefile.sh +++ b/snakemake/prepare_annotation/run_snakefile.sh @@ -1,5 +1,8 @@ # set -e +mkdir -p logs/cluster_log +mkdir -p logs/local_log + snakemake \ --cluster-config cluster.json \ --cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \ @@ -7,4 +10,4 @@ snakemake \ -p \ --rerun-incomplete \ --use-singularity \ ---singularity-args "--bind ${PWD}" \ No newline at end of file +--singularity-args "--bind ${PWD}" -- GitLab