diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index a1db0443509f0d41f70ab65a1750df71cdadec1c..0d869f4f1db37782a1e6725df2f733426c995041 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -1,7 +1,7 @@ configfile: "config.yaml" #from snakemake.utils import listfiles -localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, count_reads, determine_p_site_offset, filter_reads_based_on_read_lengths_and_offsets, bam_sort_and_index, finish +localrules: finish ################################################################################ ### Finish rule @@ -12,24 +12,7 @@ rule finish: pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]), counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]), bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"]) -################################################################################ -### Create output and log directories -################################################################################ -rule create_output_and_log_directories: - output: - output_dir = config["output_dir"], - cluster_log = config["cluster_log"], - local_log = config["local_log"], - sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]), - flag = config["dir_created"] - threads: 1 - shell: - "mkdir -p {output.output_dir}; \ - mkdir -p {output.cluster_log}; \ - mkdir -p {output.local_log}; \ - mkdir -p {output.sample_dir}; \ - touch {output.flag};" ################################################################################ ### Clipping reads @@ -37,17 +20,15 @@ rule create_output_and_log_directories: rule clip_reads: input: - flag = config["dir_created"], - reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]), + reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]) output: - reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"), + reads = os.path.join(config["output_dir"], "{sample}", "pro.clipped.fastq.gz") params: v = "-v", n = "-n", l = "20", - adapter = lambda wildcards: config[wildcards.sample]['adapter'], - z = "-z", - cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log") + adapter = "GATCGGAAGAGCACA", #lambda wildcards: config[wildcards.sample]['adapter'], + z = "-z" log: os.path.join(config["local_log"], "clip_reads_{sample}.log") singularity: @@ -76,8 +57,7 @@ rule trim_reads: l = "20", t = lambda wildcards: config[wildcards.sample]['minimum_quality'], Q = lambda wildcards: config[wildcards.sample]['quality_type'], - z = "-z", - cluster_log = os.path.join(config["cluster_log"], "trim_reads_{sample}.log") + z = "-z" log: os.path.join(config["local_log"], "trim_reads_{sample}.log") singularity: @@ -106,8 +86,7 @@ rule filter_reads: q = lambda wildcards: config[wildcards.sample]['minimum_quality'], p = "90", z = "-z", - Q = lambda wildcards: config[wildcards.sample]['quality_type'], - cluster_log = os.path.join(config["cluster_log"], "filter_reads_{sample}.log") + Q = lambda wildcards: config[wildcards.sample]['quality_type'] log: os.path.join(config["local_log"], "filter_reads_{sample}.log") singularity: @@ -134,8 +113,7 @@ rule fastq_to_fasta: params: v = "-v", n = "-n", - r = "-r", - cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log") + r = "-r" log: os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") singularity: @@ -162,8 +140,7 @@ rule map_to_other_genes: reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta") params: silent = "--silent", - accuracy = "90", - cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log") + accuracy = "90" log: os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") threads: 8 @@ -194,8 +171,7 @@ rule map_to_transcripts: reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta") params: silent = "--silent", - accuracy = "90", - cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log") + accuracy = "90" log: os.path.join(config["local_log"], "map_to_transcripts_{sample}.log") threads: 8 @@ -307,8 +283,7 @@ rule determine_p_site_offset: p_site_offset = os.path.join(config["output_dir"], "{sample}/p_site_offsets") params: - outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets"), - cluster_log = os.path.join(config["cluster_log"], "determine_p_site_offset_{sample}.log") + outdir = os.path.join(config["output_dir"], "{sample}/p_site_offsets") log: os.path.join(config["local_log"], "determine_p_site_offset_{sample}.log") threads: 1 @@ -334,8 +309,6 @@ rule filter_reads_based_on_read_lengths_and_offsets: ) output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"), - params: - cluster_log = os.path.join(config["cluster_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log") log: os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log") threads: 1 @@ -357,8 +330,6 @@ rule bam_sort_and_index: output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), - params: - cluster_log = os.path.join(config["cluster_log"], "bam_sort_and_index_{sample}.log") log: os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log") threads: 1 diff --git a/snakemake/process_data/cluster.json b/snakemake/process_data/cluster.json index 79460b66581f15141f995a41de93fd9fa2d9a852..5ab3acc35e7f9fa1bedb4add01859c1d55e50e27 100644 --- a/snakemake/process_data/cluster.json +++ b/snakemake/process_data/cluster.json @@ -1,30 +1,29 @@ { -"__default__": -{ -"queue":"6hours", -"time": "05:00:00", -"threads":"1", -"mem":"8G" -}, -"map_to_other_genes": -{ -"queue":"6hours", -"time": "06:00:00", -"threads":"8", -"mem":"50G" -}, -"map_to_transcripts": -{ -"queue":"6hours", -"time": "06:00:00", -"threads":"8", -"mem":"50G" -}, -"sam2bam_sort_and_index": -{ -"queue":"6hours", -"time": "06:00:00", -"threads":"1", -"mem":"10G" -} + "__default__" : + { + "queue": "6hours", + "time": "05:00:00", + "threads": "1", + "mem": "4G", + "name": "{rule}.{wildcards}", + "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out" + }, + "map_to_other_genes": + { + "time": "06:00:00", + "threads":"8", + "mem":"50G" + }, + "map_to_transcripts": + { + "time": "06:00:00", + "threads":"8", + "mem":"50G" + }, + "sam2bam_sort_and_index": + { + "time": "06:00:00", + "threads":"1", + "mem":"10G" + } } diff --git a/snakemake/process_data/config.yaml b/snakemake/process_data/config.yaml index c2b5488d5f1a79ada0e7b9d41ce32dca7571e0d5..f3b40e9ab620f617b5e8ec6d585c84767ef498ec 100644 --- a/snakemake/process_data/config.yaml +++ b/snakemake/process_data/config.yaml @@ -11,17 +11,16 @@ ### Output and log directory ############################################################################## output_dir: "results" - local_log: "results/local_log" - cluster_log: "results/cluster_log" - dir_created: "results/dir_created" + local_log: "logs/local_log" + cluster_log: "logs/cluster_log" ############################################################################## ### sample info ############################################################################## input_dir: "samples" input_reads_pattern: ".fastq.gz" - sample: ["example", "example2", "SRR1536304", "SRR1536305"] - example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33} - example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} - SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} - SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} + sample: ["example"] #, "example2", "SRR1536304", "SRR1536305"] + example: {adapter: "GATCGGAAGAGCACA", minimum_quality: 20, quality_type: 33} +# example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} +# SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} +# SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} ... diff --git a/snakemake/process_data/run_snakefile.sh b/snakemake/process_data/run_snakefile.sh index 6791403020a28256d19de15e0efa43447fe8ee58..536a8d02fea353f99177d5aeddeeea00b2a3766b 100755 --- a/snakemake/process_data/run_snakefile.sh +++ b/snakemake/process_data/run_snakefile.sh @@ -1,10 +1,13 @@ # set -e +mkdir -p logs/cluster_log +mkdir -p logs/local_log + snakemake \ --cluster-config cluster.json \ ---cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --output={params.cluster_log}-%j-%N -p scicore" \ +--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ --cores 256 \ -p \ --rerun-incomplete \ --use-singularity \ ---singularity-args "--bind ${PWD}" \ No newline at end of file +--singularity-args "--bind ${PWD},${PWD}/../"