Skip to content
Snippets Groups Projects
Commit 211ef70e authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Removed cluster_log from Snakefile. Altered run_snakefile.sh. Removed...

Removed cluster_log from Snakefile. Altered run_snakefile.sh. Removed unecessary generation of directories in Snakefile of process_data.
parent 53bf4372
No related branches found
No related tags found
No related merge requests found
configfile: "config.yaml" configfile: "config.yaml"
#from snakemake.utils import listfiles #from snakemake.utils import listfiles
localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, count_reads, determine_p_site_offset, filter_reads_based_on_read_lengths_and_offsets, bam_sort_and_index, finish localrules: finish
################################################################################ ################################################################################
### Finish rule ### Finish rule
...@@ -12,24 +12,7 @@ rule finish: ...@@ -12,24 +12,7 @@ rule finish:
pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]), pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]),
counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]), counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]),
bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"]) bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"])
################################################################################
### Create output and log directories
################################################################################
rule create_output_and_log_directories:
output:
output_dir = config["output_dir"],
cluster_log = config["cluster_log"],
local_log = config["local_log"],
sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]),
flag = config["dir_created"]
threads: 1
shell:
"mkdir -p {output.output_dir}; \
mkdir -p {output.cluster_log}; \
mkdir -p {output.local_log}; \
mkdir -p {output.sample_dir}; \
touch {output.flag};"
################################################################################ ################################################################################
### Clipping reads ### Clipping reads
...@@ -37,17 +20,15 @@ rule create_output_and_log_directories: ...@@ -37,17 +20,15 @@ rule create_output_and_log_directories:
rule clip_reads: rule clip_reads:
input: input:
flag = config["dir_created"], reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"])
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]),
output: output:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"), reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz")
params: params:
v = "-v", v = "-v",
n = "-n", n = "-n",
l = "20", l = "20",
adapter = lambda wildcards: config[wildcards.sample]['adapter'], adapter = "GATCGGAAGAGCACA", #lambda wildcards: config[wildcards.sample]['adapter'],
z = "-z", z = "-z"
cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log")
log: log:
os.path.join(config["local_log"], "clip_reads_{sample}.log") os.path.join(config["local_log"], "clip_reads_{sample}.log")
singularity: singularity:
...@@ -76,8 +57,7 @@ rule trim_reads: ...@@ -76,8 +57,7 @@ rule trim_reads:
l = "20", l = "20",
t = lambda wildcards: config[wildcards.sample]['minimum_quality'], t = lambda wildcards: config[wildcards.sample]['minimum_quality'],
Q = lambda wildcards: config[wildcards.sample]['quality_type'], Q = lambda wildcards: config[wildcards.sample]['quality_type'],
z = "-z", z = "-z"
cluster_log = os.path.join(config["cluster_log"], "trim_reads_{sample}.log")
log: log:
os.path.join(config["local_log"], "trim_reads_{sample}.log") os.path.join(config["local_log"], "trim_reads_{sample}.log")
singularity: singularity:
...@@ -106,8 +86,7 @@ rule filter_reads: ...@@ -106,8 +86,7 @@ rule filter_reads:
q = lambda wildcards: config[wildcards.sample]['minimum_quality'], q = lambda wildcards: config[wildcards.sample]['minimum_quality'],
p = "90", p = "90",
z = "-z", z = "-z",
Q = lambda wildcards: config[wildcards.sample]['quality_type'], Q = lambda wildcards: config[wildcards.sample]['quality_type']
cluster_log = os.path.join(config["cluster_log"], "filter_reads_{sample}.log")
log: log:
os.path.join(config["local_log"], "filter_reads_{sample}.log") os.path.join(config["local_log"], "filter_reads_{sample}.log")
singularity: singularity:
...@@ -134,8 +113,7 @@ rule fastq_to_fasta: ...@@ -134,8 +113,7 @@ rule fastq_to_fasta:
params: params:
v = "-v", v = "-v",
n = "-n", n = "-n",
r = "-r", r = "-r"
cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log")
log: log:
os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
singularity: singularity:
...@@ -162,8 +140,7 @@ rule map_to_other_genes: ...@@ -162,8 +140,7 @@ rule map_to_other_genes:
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta") reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
params: params:
silent = "--silent", silent = "--silent",
accuracy = "90", accuracy = "90"
cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log")
log: log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8 threads: 8
...@@ -194,8 +171,7 @@ rule map_to_transcripts: ...@@ -194,8 +171,7 @@ rule map_to_transcripts:
reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta") reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
params: params:
silent = "--silent", silent = "--silent",
accuracy = "90", accuracy = "90"
cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log")
log: log:
os.path.join(config["local_log"], "map_to_transcripts_{sample}.log") os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
threads: 8 threads: 8
...@@ -307,8 +283,7 @@ rule determine_p_site_offset: ...@@ -307,8 +283,7 @@ rule determine_p_site_offset:
p_site_offset = os.path.join(config["output_dir"], p_site_offset = os.path.join(config["output_dir"],
"{sample}/p_site_offsets") "{sample}/p_site_offsets")
params: params:
outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets"), outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets")
cluster_log = os.path.join(config["cluster_log"], "determine_p_site_offset_{sample}.log")
log: log:
os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log") os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log")
threads: 1 threads: 1
...@@ -334,8 +309,6 @@ rule filter_reads_based_on_read_lengths_and_offsets: ...@@ -334,8 +309,6 @@ rule filter_reads_based_on_read_lengths_and_offsets:
) )
output: output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"), bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
params:
cluster_log = os.path.join(config["cluster_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
log: log:
os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log") os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
threads: 1 threads: 1
...@@ -357,8 +330,6 @@ rule bam_sort_and_index: ...@@ -357,8 +330,6 @@ rule bam_sort_and_index:
output: output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"), bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"),
bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"),
params:
cluster_log = os.path.join(config["cluster_log"], "bam_sort_and_index_{sample}.log")
log: log:
os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log") os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log")
threads: 1 threads: 1
......
{ {
"__default__": "__default__" :
{ {
"queue":"6hours", "queue": "6hours",
"time": "05:00:00", "time": "05:00:00",
"threads":"1", "threads": "1",
"mem":"8G" "mem": "4G",
}, "name": "{rule}.{wildcards}",
"map_to_other_genes": "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
{ },
"queue":"6hours", "map_to_other_genes":
"time": "06:00:00", {
"threads":"8", "time": "06:00:00",
"mem":"50G" "threads":"8",
}, "mem":"50G"
"map_to_transcripts": },
{ "map_to_transcripts":
"queue":"6hours", {
"time": "06:00:00", "time": "06:00:00",
"threads":"8", "threads":"8",
"mem":"50G" "mem":"50G"
}, },
"sam2bam_sort_and_index": "sam2bam_sort_and_index":
{ {
"queue":"6hours", "time": "06:00:00",
"time": "06:00:00", "threads":"1",
"threads":"1", "mem":"10G"
"mem":"10G" }
}
} }
...@@ -11,17 +11,16 @@ ...@@ -11,17 +11,16 @@
### Output and log directory ### Output and log directory
############################################################################## ##############################################################################
output_dir: "results" output_dir: "results"
local_log: "results/local_log" local_log: "logs/local_log"
cluster_log: "results/cluster_log" cluster_log: "logs/cluster_log"
dir_created: "results/dir_created"
############################################################################## ##############################################################################
### sample info ### sample info
############################################################################## ##############################################################################
input_dir: "samples" input_dir: "samples"
input_reads_pattern: ".fastq.gz" input_reads_pattern: ".fastq.gz"
sample: ["example", "example2", "SRR1536304", "SRR1536305"] sample: ["example"] #, "example2", "SRR1536304", "SRR1536305"]
example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33} example: {adapter: "GATCGGAAGAGCACA", minimum_quality: 20, quality_type: 33}
example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} # example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64}
SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} # SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} # SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
... ...
# set -e # set -e
mkdir -p logs/cluster_log
mkdir -p logs/local_log
snakemake \ snakemake \
--cluster-config cluster.json \ --cluster-config cluster.json \
--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \ --cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \
--cores 256 \ --cores 256 \
-p \ -p \
--rerun-incomplete \ --rerun-incomplete \
--use-singularity \ --use-singularity \
--singularity-args "--bind ${PWD}" --singularity-args "--bind ${PWD},${PWD}/../"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment