Skip to content
Snippets Groups Projects
Commit 100e648e authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Removed cluster_log from rules in prepare_annotation Snakefile. Removed create...

Removed cluster_log from rules in prepare_annotation Snakefile. Removed create directories rule in prepare_annotation Snakefile. Altered cluster.json in prepare_annotation Snakefile. Removed unecessary info from config.yaml in prepare_annotation Snakefile.
parent d18d5994
Branches
No related tags found
No related merge requests found
configfile: "config.yaml" configfile: "config.yaml"
localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish localrules: create_tab_delimited_CDS_file, create_bed_CDS_file, finish
################################################################################# #################################################################################
### Finish rule ### Finish rule
...@@ -12,36 +12,16 @@ rule finish: ...@@ -12,36 +12,16 @@ rule finish:
idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"), idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"),
bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
#################################################################################
### Create output and log directories
#################################################################################
rule create_output_and_log_directories:
output:
output_dir = config["output_dir"],
cluster_log = config["cluster_log"],
local_log = config["local_log"],
flag = config["dir_created"]
threads: 1
shell:
"mkdir -p {output.output_dir}; \
mkdir -p {output.cluster_log}; \
mkdir -p {output.local_log}; \
touch {output.flag};"
################################################################################# #################################################################################
### Select longest protein coding transcripts ### Select longest protein coding transcripts
################################################################################# #################################################################################
rule select_longest_coding_transcripts: rule select_longest_coding_transcripts:
input: input:
flag = config["dir_created"],
gtf = config["gtf"], gtf = config["gtf"],
script = os.path.join(config["scripts"], "find_longest_coding_transcripts.py") script = os.path.join(config["scripts"], "find_longest_coding_transcripts.py")
output: output:
gtf = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.gtf") gtf = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.gtf")
params:
cluster_log = os.path.join(config["cluster_log"], "select_longest_coding_transcript.log")
log: log:
os.path.join(config["local_log"], "select_longest_coding_transcript.log") os.path.join(config["local_log"], "select_longest_coding_transcript.log")
singularity: singularity:
...@@ -58,12 +38,9 @@ rule select_longest_coding_transcripts: ...@@ -58,12 +38,9 @@ rule select_longest_coding_transcripts:
rule generate_segemehl_index_other_RNAs: rule generate_segemehl_index_other_RNAs:
input: input:
flag = config["dir_created"],
sequence = config["other_RNAs_sequence"] sequence = config["other_RNAs_sequence"]
output: output:
idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx") idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx")
params:
cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_other_RNAs.log")
log: log:
os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log") os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log")
singularity: singularity:
...@@ -81,8 +58,6 @@ rule extract_transcript_sequences: ...@@ -81,8 +58,6 @@ rule extract_transcript_sequences:
genome = config["genome"] genome = config["genome"]
output: output:
transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa") transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
params:
cluster_log = os.path.join(config["cluster_log"], "extract_transcript_sequences.log")
log: log:
os.path.join(config["local_log"], "extract_transcript_sequences.log") os.path.join(config["local_log"], "extract_transcript_sequences.log")
singularity: singularity:
...@@ -103,8 +78,6 @@ rule create_tab_delimited_CDS_file: ...@@ -103,8 +78,6 @@ rule create_tab_delimited_CDS_file:
script = os.path.join(config["scripts"], "create_tab_delimited_CDS_file.py") script = os.path.join(config["scripts"], "create_tab_delimited_CDS_file.py")
output: output:
tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
params:
cluster_log = os.path.join(config["cluster_log"], "create_tab_delimited_CDS_file.log")
log: log:
os.path.join(config["local_log"], "create_tab_delimited_CDS_file.log") os.path.join(config["local_log"], "create_tab_delimited_CDS_file.log")
singularity: singularity:
...@@ -124,8 +97,6 @@ rule create_bed_CDS_file: ...@@ -124,8 +97,6 @@ rule create_bed_CDS_file:
tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv")
output: output:
bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed")
params:
cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log")
log: log:
os.path.join(config["local_log"], "create_bed_CDS_file.log") os.path.join(config["local_log"], "create_bed_CDS_file.log")
# singularity: # singularity:
...@@ -142,8 +113,6 @@ rule generate_segemehl_index_transcripts: ...@@ -142,8 +113,6 @@ rule generate_segemehl_index_transcripts:
sequence = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa") sequence = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.fa")
output: output:
idx = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx") idx = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx")
params:
cluster_log = os.path.join(config["cluster_log"], "generate_segemehl_index_transcripts.log")
log: log:
os.path.join(config["local_log"], "generate_segemehl_index_transcripts.log") os.path.join(config["local_log"], "generate_segemehl_index_transcripts.log")
singularity: singularity:
......
{ {
"__default__": "__default__" :
{ {
"queue":"6hours", "queue": "6hours",
"time": "05:00:00", "time": "05:00:00",
"threads":"1", "threads": "1",
"mem":"4G" "mem": "4G",
}, "name": "{rule}.{wildcards}",
"generate_segemehl_index_other_RNAs": "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
{ },
"queue":"6hours", "generate_segemehl_index_other_RNAs":
"time": "06:00:00", {
"threads":"8", "time": "06:00:00",
"mem":"50G" "threads":"8",
}, "mem":"50G"
"generate_segemehl_index_transcripts": },
{ "generate_segemehl_index_transcripts":
"queue":"6hours", {
"time": "06:00:00", "time": "06:00:00",
"threads":"8", "threads":"8",
"mem":"50G" "mem":"50G"
} }
} }
...@@ -9,15 +9,7 @@ ...@@ -9,15 +9,7 @@
### Output and log directory ### Output and log directory
############################################################################## ##############################################################################
output_dir: "results" output_dir: "results"
local_log: "results/local_log" local_log: "logs/local_log"
cluster_log: "results/cluster_log" cluster_log: "logs/cluster_log"
dir_created: "results/dir_created"
scripts: "scripts" scripts: "scripts"
##############################################################################
### sample info
##############################################################################
input_dir: "samples"
input_reads_pattern: ".fastq.gz"
sample: ["example"]
example: {adapter: GATCGGAAGAGCACA}
... ...
# set -e # set -e
mkdir -p logs/cluster_log
mkdir -p logs/local_log
snakemake \ snakemake \
--cluster-config cluster.json \ --cluster-config cluster.json \
--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \ --cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \
...@@ -7,4 +10,4 @@ snakemake \ ...@@ -7,4 +10,4 @@ snakemake \
-p \ -p \
--rerun-incomplete \ --rerun-incomplete \
--use-singularity \ --use-singularity \
--singularity-args "--bind ${PWD}" --singularity-args "--bind ${PWD}"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment