Skip to content
Snippets Groups Projects
Commit 211ef70e authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Removed cluster_log from Snakefile. Altered run_snakefile.sh. Removed...

Removed cluster_log from Snakefile. Altered run_snakefile.sh. Removed unecessary generation of directories in Snakefile of process_data.
parent 53bf4372
No related branches found
No related tags found
No related merge requests found
configfile: "config.yaml"
#from snakemake.utils import listfiles
localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, count_reads, determine_p_site_offset, filter_reads_based_on_read_lengths_and_offsets, bam_sort_and_index, finish
localrules: finish
################################################################################
### Finish rule
......@@ -12,24 +12,7 @@ rule finish:
pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]),
counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]),
bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"])
################################################################################
### Create output and log directories
################################################################################
rule create_output_and_log_directories:
output:
output_dir = config["output_dir"],
cluster_log = config["cluster_log"],
local_log = config["local_log"],
sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]),
flag = config["dir_created"]
threads: 1
shell:
"mkdir -p {output.output_dir}; \
mkdir -p {output.cluster_log}; \
mkdir -p {output.local_log}; \
mkdir -p {output.sample_dir}; \
touch {output.flag};"
################################################################################
### Clipping reads
......@@ -37,17 +20,15 @@ rule create_output_and_log_directories:
rule clip_reads:
input:
flag = config["dir_created"],
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]),
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"])
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"),
reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz")
params:
v = "-v",
n = "-n",
l = "20",
adapter = lambda wildcards: config[wildcards.sample]['adapter'],
z = "-z",
cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log")
adapter = "GATCGGAAGAGCACA", #lambda wildcards: config[wildcards.sample]['adapter'],
z = "-z"
log:
os.path.join(config["local_log"], "clip_reads_{sample}.log")
singularity:
......@@ -76,8 +57,7 @@ rule trim_reads:
l = "20",
t = lambda wildcards: config[wildcards.sample]['minimum_quality'],
Q = lambda wildcards: config[wildcards.sample]['quality_type'],
z = "-z",
cluster_log = os.path.join(config["cluster_log"], "trim_reads_{sample}.log")
z = "-z"
log:
os.path.join(config["local_log"], "trim_reads_{sample}.log")
singularity:
......@@ -106,8 +86,7 @@ rule filter_reads:
q = lambda wildcards: config[wildcards.sample]['minimum_quality'],
p = "90",
z = "-z",
Q = lambda wildcards: config[wildcards.sample]['quality_type'],
cluster_log = os.path.join(config["cluster_log"], "filter_reads_{sample}.log")
Q = lambda wildcards: config[wildcards.sample]['quality_type']
log:
os.path.join(config["local_log"], "filter_reads_{sample}.log")
singularity:
......@@ -134,8 +113,7 @@ rule fastq_to_fasta:
params:
v = "-v",
n = "-n",
r = "-r",
cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log")
r = "-r"
log:
os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
singularity:
......@@ -162,8 +140,7 @@ rule map_to_other_genes:
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log")
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
......@@ -194,8 +171,7 @@ rule map_to_transcripts:
reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log")
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
threads: 8
......@@ -307,8 +283,7 @@ rule determine_p_site_offset:
p_site_offset = os.path.join(config["output_dir"],
"{sample}/p_site_offsets")
params:
outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets"),
cluster_log = os.path.join(config["cluster_log"], "determine_p_site_offset_{sample}.log")
outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets")
log:
os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log")
threads: 1
......@@ -334,8 +309,6 @@ rule filter_reads_based_on_read_lengths_and_offsets:
)
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
params:
cluster_log = os.path.join(config["cluster_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
log:
os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
threads: 1
......@@ -357,8 +330,6 @@ rule bam_sort_and_index:
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"),
bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"),
params:
cluster_log = os.path.join(config["cluster_log"], "bam_sort_and_index_{sample}.log")
log:
os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log")
threads: 1
......
{
"__default__":
{
"queue":"6hours",
"time": "05:00:00",
"threads":"1",
"mem":"8G"
},
"map_to_other_genes":
{
"queue":"6hours",
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"map_to_transcripts":
{
"queue":"6hours",
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"sam2bam_sort_and_index":
{
"queue":"6hours",
"time": "06:00:00",
"threads":"1",
"mem":"10G"
}
"__default__" :
{
"queue": "6hours",
"time": "05:00:00",
"threads": "1",
"mem": "4G",
"name": "{rule}.{wildcards}",
"out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
},
"map_to_other_genes":
{
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"map_to_transcripts":
{
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"sam2bam_sort_and_index":
{
"time": "06:00:00",
"threads":"1",
"mem":"10G"
}
}
......@@ -11,17 +11,16 @@
### Output and log directory
##############################################################################
output_dir: "results"
local_log: "results/local_log"
cluster_log: "results/cluster_log"
dir_created: "results/dir_created"
local_log: "logs/local_log"
cluster_log: "logs/cluster_log"
##############################################################################
### sample info
##############################################################################
input_dir: "samples"
input_reads_pattern: ".fastq.gz"
sample: ["example", "example2", "SRR1536304", "SRR1536305"]
example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33}
example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64}
SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
sample: ["example"] #, "example2", "SRR1536304", "SRR1536305"]
example: {adapter: "GATCGGAAGAGCACA", minimum_quality: 20, quality_type: 33}
# example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64}
# SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
# SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
...
# set -e
mkdir -p logs/cluster_log
mkdir -p logs/local_log
snakemake \
--cluster-config cluster.json \
--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \
--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \
--cores 256 \
-p \
--rerun-incomplete \
--use-singularity \
--singularity-args "--bind ${PWD}"
\ No newline at end of file
--singularity-args "--bind ${PWD},${PWD}/../"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment