From ca55b93c0940a8498e27c79274451932ff5d5e86 Mon Sep 17 00:00:00 2001 From: BIOPZ-Bak Maciej <maciej.bak@unibas.ch> Date: Thu, 19 Aug 2021 22:01:22 +0000 Subject: [PATCH] feat: add Snakemake profiles --- .gitlab-ci.yml | 4 +- README.md | 53 ++- profiles/CookieCutterSlurm.py | 31 ++ profiles/graphs/config.yaml | 6 + profiles/local-conda/config.yaml | 8 + profiles/local-singularity/config.yaml | 9 + profiles/slurm-conda/config.yaml | 12 + .../slurm-config.json | 101 +++-- profiles/slurm-jobscript.sh | 3 + profiles/slurm-settings.json | 6 + profiles/slurm-singularity/config.yaml | 13 + profiles/slurm-status.py | 72 ++++ profiles/slurm-submit.py | 60 +++ profiles/slurm_utils.py | 345 ++++++++++++++++++ resources/config_schema.json | 4 + tests/input_files/config.mutliple_lanes.yml | 1 + tests/input_files/config.yaml | 1 + tests/test_create_dag_image/test.sh | 8 +- tests/test_create_rule_graph/test.sh | 8 +- tests/test_integration_workflow/test.local.sh | 12 +- tests/test_integration_workflow/test.slurm.sh | 15 +- .../test.local.sh | 12 +- .../test.slurm.sh | 15 +- .../test.local.sh | 11 +- .../test.slurm.sh | 14 +- workflow/Snakefile | 39 +- workflow/rules/paired_end.snakefile.smk | 5 + workflow/rules/single_end.snakefile.smk | 5 + 28 files changed, 714 insertions(+), 159 deletions(-) create mode 100644 profiles/CookieCutterSlurm.py create mode 100644 profiles/graphs/config.yaml create mode 100644 profiles/local-conda/config.yaml create mode 100644 profiles/local-singularity/config.yaml create mode 100644 profiles/slurm-conda/config.yaml rename tests/input_files/cluster.json => profiles/slurm-config.json (60%) create mode 100755 profiles/slurm-jobscript.sh create mode 100644 profiles/slurm-settings.json create mode 100644 profiles/slurm-singularity/config.yaml create mode 100755 profiles/slurm-status.py create mode 100755 profiles/slurm-submit.py create mode 100644 profiles/slurm_utils.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bd07e11..7e5b49c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,8 +12,8 @@ test: # add code quality tests here # add unit tests here # add script tests here - - bash tests/test_scripts_prepare_inputs_table/test.sh - # - bash tests/test_scripts_prepare_inputs_labkey/test.sh + #- bash tests/test_scripts_prepare_inputs_table/test.sh + #- bash tests/test_scripts_prepare_inputs_labkey/test.sh #- bash tests/test_alfa/test.sh # add integration tests here - bash tests/test_integration_workflow_with_conda/test.local.sh diff --git a/README.md b/README.md index 8567fff..deeeda1 100644 --- a/README.md +++ b/README.md @@ -137,12 +137,12 @@ or bash tests/test_integration_workflow_with_conda/test.slurm.sh ``` -> **NOTE:** Depending on the configuration of your Slurm installation or if -> using a different workload manager, you may need to adapt file `cluster.json` -> and the arguments to options `--config`, `--cores` and `--jobs` in the file -> `test.slurm.sh`, both located in directory `tests/test_integration_workflow`. +> **NOTE:** Depending on the configuration of your Slurm installation you may +> need to adapt file `slurm-config.json` (located directly under `profiles` +> directory) and the arguments to options `--cores` and `--jobs` +> in the file `config.yaml` of a respective profile. > Consult the manual of your workload manager as well as the section of the -> Snakemake manual dealing with [cluster execution]. +> Snakemake manual dealing with [profiles]. ## Running the workflow on your own samples @@ -154,13 +154,11 @@ create a directory for your workflow run and traverse inside it with: cd config/my_run ``` -2. Create empty sample table, workflow configuration and, if necessary, cluster -configuration files: +2. Create an empty sample table and a workflow configuration file: ```bash touch samples.tsv touch config.yaml - touch cluster.json ``` 3. Use your editor of choice to populate these files with appropriate @@ -169,12 +167,12 @@ files should look like, specifically: - [samples.tsv](tests/input_files/samples.tsv) - [config.yaml](tests/input_files/config.yaml) - - [cluster.json](tests/input_files/cluster.json) 4. Create a runner script. Pick one of the following choices for either local -or cluster execution. Before execution of the respective command, you must -replace the data directory placeholders in the argument of the -`--singularity-args` option with a comma-separated list of _all_ directories +or cluster execution. Before execution of the respective command, you need to +remember to update the argument of the `--singularity-args` option of a +respective profile (file: `profiles/{profile}/config.yaml`) so that +it contains a comma-separated list of _all_ directories containing input data files (samples and any annoation files etc) required for your run. @@ -183,21 +181,19 @@ your run. ```bash cat << "EOF" > run.sh #!/bin/bash + snakemake \ - --snakefile="/path/to/Snakefile" \ - --configfile="config.yaml" \ - --cores=4 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind <data_dir_1>,<data_dir_2>,<data_dir_n>" + --profile="../profiles/local-singularity" \ + --configfile="config.yaml" + EOF ``` **OR** Runner script for _Slurm cluster exection_ (note that you may need - to modify the arguments to `--cluster` and `--cores` depending on your HPC + to modify the arguments to `--jobs` and `--cores` in the file: + `profiles/slurm-singularity/config.yaml` depending on your HPC and workload manager configuration): ```bash @@ -205,20 +201,13 @@ your run. #!/bin/bash mkdir -p logs/cluster_log snakemake \ - --snakefile="/path/to/Snakefile" \ - --configfile="config.yaml" \ - --cluster-config="cluster.json" \ - --cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ - --cores=256 \ - --jobs=256 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind <data_dir_1>,<data_dir_2>,<data_dir_n>" + --profile="../profiles/slurm-singularity" \ + --configfile="config.yaml" EOF ``` - When running the pipeline with conda you should use the `--use-conda` flag instead of `--use-singularity` and `--singularity-args`. + When running the pipeline with *conda* you should use `local-conda` and + `slurm-conda` profiles instead. 5. Start your workflow run: @@ -335,7 +324,7 @@ Molecule | molecule Contaminant sequences | contaminant_seqs [conda]: <https://docs.conda.io/projects/conda/en/latest/index.html> -[cluster execution]: <https://snakemake.readthedocs.io/en/stable/executing/cluster-cloud.html#cluster-execution> +[profiles]: <https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles> [labkey]: <https://www.labkey.com/> [miniconda-installation]: <https://docs.conda.io/en/latest/miniconda.html> [rule-graph]: images/rule_graph.svg diff --git a/profiles/CookieCutterSlurm.py b/profiles/CookieCutterSlurm.py new file mode 100644 index 0000000..6ad06cc --- /dev/null +++ b/profiles/CookieCutterSlurm.py @@ -0,0 +1,31 @@ +# +# Based on lsf CookieCutter.py +# +import os +import json + +d = os.path.dirname(__file__) +with open(os.path.join(d, "slurm-settings.json")) as fh: + settings = json.load(fh) + + +class CookieCutter: + + SBATCH_DEFAULTS = settings['SBATCH_DEFAULTS'] + CLUSTER_NAME = settings['CLUSTER_NAME'] + CLUSTER_CONFIG = settings['CLUSTER_CONFIG'] + ADVANCED_ARGUMENT_CONVERSION = settings['ADVANCED_ARGUMENT_CONVERSION'] + + @staticmethod + def get_cluster_option() -> str: + cluster = CookieCutter.CLUSTER_NAME + if cluster != "": + return f"--cluster={cluster}" + return "" + + @staticmethod + def get_advanced_argument_conversion() -> bool: + val = {"yes": True, "no": False}[ + CookieCutter.ADVANCED_ARGUMENT_CONVERSION + ] + return val diff --git a/profiles/graphs/config.yaml b/profiles/graphs/config.yaml new file mode 100644 index 0000000..37ac287 --- /dev/null +++ b/profiles/graphs/config.yaml @@ -0,0 +1,6 @@ +snakefile: "../../workflow/Snakefile" +printshellcmds: true +dryrun: true +verbose: true +notemp: true +no-hooks: true diff --git a/profiles/local-conda/config.yaml b/profiles/local-conda/config.yaml new file mode 100644 index 0000000..af75ebe --- /dev/null +++ b/profiles/local-conda/config.yaml @@ -0,0 +1,8 @@ +snakefile: "../../workflow/Snakefile" +cores: 4 +printshellcmds: true +rerun-incomplete: true +use-conda: true +notemp: true +no-hooks: true +verbose: true diff --git a/profiles/local-singularity/config.yaml b/profiles/local-singularity/config.yaml new file mode 100644 index 0000000..5bf5007 --- /dev/null +++ b/profiles/local-singularity/config.yaml @@ -0,0 +1,9 @@ +snakefile: "../../workflow/Snakefile" +cores: 4 +printshellcmds: true +rerun-incomplete: true +use-singularity: true +singularity-args: "--bind ./../input_files,./../../images" +notemp: true +no-hooks: true +verbose: true diff --git a/profiles/slurm-conda/config.yaml b/profiles/slurm-conda/config.yaml new file mode 100644 index 0000000..2fb0969 --- /dev/null +++ b/profiles/slurm-conda/config.yaml @@ -0,0 +1,12 @@ +jobscript: "../slurm-jobscript.sh" +cluster: "../slurm-submit.py" +cluster-status: "../slurm-status.py" +snakefile: "../../workflow/Snakefile" +cores: 256 +jobs: 256 +printshellcmds: true +rerun-incomplete: true +use-conda: true +notemp: true +no-hooks: true +verbose: true diff --git a/tests/input_files/cluster.json b/profiles/slurm-config.json similarity index 60% rename from tests/input_files/cluster.json rename to profiles/slurm-config.json index 12e9b99..c64e5af 100644 --- a/tests/input_files/cluster.json +++ b/profiles/slurm-config.json @@ -1,197 +1,228 @@ { "__default__" : { - "queue": "6hours", + "qos": "6hours", "time": "01:00:00", - "threads": "1", + "cpus-per-task": "1", "mem": "4G", - "name": "{rule}.{wildcards}", - "out": "logs/cluster/{rule}.{wildcards}-%j-%N.out" + "job-name": "{rule}.{wildcards}", + "output": "{params.cluster_log_path}/{rule}.{wildcards}-%j-%N.out" }, "create_index_star": { + "qos": "6hours", "time": "06:00:00", - "threads":"12", + "cpus-per-task":"12", "mem":"45G" }, "extract_transcripts_as_bed12": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"1G" }, "extract_transcriptome": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"1G" }, "extract_decoys_salmon": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"10G" }, "concatenate_transcriptome_and_genome": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"10G" }, "create_index_salmon": { + "qos": "6hours", "time": "03:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"40G" }, "sort_bed_4_big": { + "qos": "6hours", "time": "03:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"20G" }, "create_index_kallisto": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"10G" }, "index_genomic_alignment_samtools": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "star_rpm": { + "qos": "30min", "time": "00:30:00", - "threads":"4", + "cpus-per-task":"4", "mem":"15G" }, "rename_star_rpm_for_alfa": { + "qos": "6hours", "time": "03:00:00", - "threads":"1", + "cpus-per-task":"1", "mem":"32G" }, "calculate_TIN_scores": { + "qos": "6hours", "time": "06:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"15G" }, "merge_TIN_scores": { + "qos": "30min", "time": "00:05:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "plot_TIN_scores": { + "qos": "30min", "time": "00:05:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "salmon_quantmerge_genes": { + "qos": "30min", "time": "00:05:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "salmon_quantmerge_transcripts": { + "qos": "30min", "time": "00:05:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "generate_alfa_index": { + "qos": "6hours", "time": "02:00:00", - "threads":"4", + "cpus-per-task":"4", "mem":"1G" }, "alfa_qc": { + "qos": "30min", "time": "00:30:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "alfa_qc_all_samples": { + "qos": "6hours", "time": "01:00:00", - "threads":"1", + "cpus-per-task":"1", "mem":"500M" }, "pe_fastqc": { + "qos": "6hours", "time": "01:00:00", - "threads":"2", + "cpus-per-task":"2", "mem":"1G" }, "fastqc": { + "qos": "6hours", "time": "01:00:00", - "threads":"2", + "cpus-per-task":"2", "mem":"1G" }, "pe_remove_adapters_cutadapt": { + "qos": "6hours", "time": "06:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"1G" }, "remove_adapters_cutadapt": { + "qos": "6hours", "time": "06:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"1G" }, "pe_remove_polya_cutadapt": { + "qos": "6hours", "time": "06:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"1G" }, "remove_polya_cutadapt": { + "qos": "6hours", "time": "06:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"1G" }, "pe_map_genome_star": { + "qos": "6hours", "time": "06:00:00", - "threads":"12", + "cpus-per-task":"12", "mem":"50G" }, "map_genome_star": { + "qos": "6hours", "time": "06:00:00", - "threads":"12", + "cpus-per-task":"12", "mem":"50G" }, "pe_quantification_salmon": { + "qos": "6hours", "time": "03:00:00", - "threads":"6", + "cpus-per-task":"6", "mem":"20G" }, "quantification_salmon": { + "qos": "6hours", "time": "03:00:00", - "threads":"6", + "cpus-per-task":"6", "mem":"20G" }, "pe_genome_quantification_kallisto": { + "qos": "6hours", "time": "03:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"15G" }, "genome_quantification_kallisto": { + "qos": "6hours", "time": "03:00:00", - "threads":"8", + "cpus-per-task":"8", "mem":"15G" } } diff --git a/profiles/slurm-jobscript.sh b/profiles/slurm-jobscript.sh new file mode 100755 index 0000000..391741e --- /dev/null +++ b/profiles/slurm-jobscript.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# properties = {properties} +{exec_job} diff --git a/profiles/slurm-settings.json b/profiles/slurm-settings.json new file mode 100644 index 0000000..37e94d7 --- /dev/null +++ b/profiles/slurm-settings.json @@ -0,0 +1,6 @@ +{ + "SBATCH_DEFAULTS": "", + "CLUSTER_NAME": "", + "CLUSTER_CONFIG": "slurm-config.json", + "ADVANCED_ARGUMENT_CONVERSION": "no" +} diff --git a/profiles/slurm-singularity/config.yaml b/profiles/slurm-singularity/config.yaml new file mode 100644 index 0000000..4fc0a6b --- /dev/null +++ b/profiles/slurm-singularity/config.yaml @@ -0,0 +1,13 @@ +jobscript: "../slurm-jobscript.sh" +cluster: "../slurm-submit.py" +cluster-status: "../slurm-status.py" +snakefile: "../../workflow/Snakefile" +cores: 256 +jobs: 256 +printshellcmds: true +rerun-incomplete: true +use-singularity: true +singularity-args: "--bind ./../input_files,./../../images" +notemp: true +no-hooks: true +verbose: true diff --git a/profiles/slurm-status.py b/profiles/slurm-status.py new file mode 100755 index 0000000..9e51349 --- /dev/null +++ b/profiles/slurm-status.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +import re +import subprocess as sp +import shlex +import sys +import time +import logging +from CookieCutterSlurm import CookieCutter + +logger = logging.getLogger("__name__") + +STATUS_ATTEMPTS = 20 + +jobid = sys.argv[1] + +cluster = CookieCutter.get_cluster_option() + +for i in range(STATUS_ATTEMPTS): + try: + sacct_res = sp.check_output(shlex.split(f"sacct {cluster} -P -b -j {jobid} -n")) + res = { + x.split("|")[0]: x.split("|")[1] + for x in sacct_res.decode().strip().split("\n") + } + break + except sp.CalledProcessError as e: + logger.error("sacct process error") + logger.error(e) + except IndexError as e: + logger.error(e) + pass + # Try getting job with scontrol instead in case sacct is misconfigured + try: + sctrl_res = sp.check_output( + shlex.split(f"scontrol {cluster} -o show job {jobid}") + ) + m = re.search(r"JobState=(\w+)", sctrl_res.decode()) + res = {jobid: m.group(1)} + break + except sp.CalledProcessError as e: + logger.error("scontrol process error") + logger.error(e) + if i >= STATUS_ATTEMPTS - 1: + print("failed") + exit(0) + else: + time.sleep(1) + +status = res[jobid] + +if status == "BOOT_FAIL": + print("failed") +elif status == "OUT_OF_MEMORY": + print("failed") +elif status.startswith("CANCELLED"): + print("failed") +elif status == "COMPLETED": + print("success") +elif status == "DEADLINE": + print("failed") +elif status == "FAILED": + print("failed") +elif status == "NODE_FAIL": + print("failed") +elif status == "PREEMPTED": + print("failed") +elif status == "TIMEOUT": + print("failed") +elif status == "SUSPENDED": + print("running") +else: + print("running") diff --git a/profiles/slurm-submit.py b/profiles/slurm-submit.py new file mode 100755 index 0000000..4b7b79e --- /dev/null +++ b/profiles/slurm-submit.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +""" +Snakemake SLURM submit script. +""" +from snakemake.utils import read_job_properties + +import slurm_utils +from CookieCutterSlurm import CookieCutter + +# cookiecutter arguments +SBATCH_DEFAULTS = CookieCutter.SBATCH_DEFAULTS +CLUSTER = CookieCutter.get_cluster_option() +CLUSTER_CONFIG = CookieCutter.CLUSTER_CONFIG +ADVANCED_ARGUMENT_CONVERSION = CookieCutter.get_advanced_argument_conversion() + +RESOURCE_MAPPING = { + "time": ("time", "runtime", "walltime"), + "mem": ("mem", "mem_mb", "ram", "memory"), + "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"), + "nodes": ("nodes", "nnodes"), +} + +# parse job +jobscript = slurm_utils.parse_jobscript() +job_properties = read_job_properties(jobscript) + +sbatch_options = {} +cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG) + +# 1) sbatch default arguments and cluster +sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS)) +sbatch_options.update(slurm_utils.parse_sbatch_defaults(CLUSTER)) + +# 2) cluster_config defaults +sbatch_options.update(cluster_config["__default__"]) + +# 3) Convert resources (no unit conversion!) and threads +sbatch_options.update( + slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING) +) + +# 4) cluster_config for particular rule +sbatch_options.update(cluster_config.get(job_properties.get("rule"), {})) + +# 5) cluster_config options +sbatch_options.update(job_properties.get("cluster", {})) + +# 6) Advanced conversion of parameters +if ADVANCED_ARGUMENT_CONVERSION: + sbatch_options = slurm_utils.advanced_argument_conversion(sbatch_options) + +# 7) Format pattern in snakemake style +sbatch_options = slurm_utils.format_values(sbatch_options, job_properties) + +# ensure sbatch output dirs exist +for o in ("output", "error"): + slurm_utils.ensure_dirs_exist(sbatch_options[o]) if o in sbatch_options else None + +# submit job and echo id back to Snakemake (must be the only stdout) +print(slurm_utils.submit_job(jobscript, **sbatch_options)) diff --git a/profiles/slurm_utils.py b/profiles/slurm_utils.py new file mode 100644 index 0000000..9e266fc --- /dev/null +++ b/profiles/slurm_utils.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +import os +import sys +from os.path import dirname +import re +import math +import argparse +import subprocess as sp +from io import StringIO + +from snakemake import io +from snakemake.io import Wildcards +from snakemake.utils import SequenceFormatter +from snakemake.utils import AlwaysQuotedFormatter +from snakemake.utils import QuotedFormatter +from snakemake.exceptions import WorkflowError +from snakemake.logging import logger + +from CookieCutterSlurm import CookieCutter + + +def _convert_units_to_mb(memory): + """If memory is specified with SI unit, convert to MB""" + if isinstance(memory, int) or isinstance(memory, float): + return int(memory) + siunits = {"K": 1e-3, "M": 1, "G": 1e3, "T": 1e6} + regex = re.compile(r"(\d+)({})$".format("|".join(siunits.keys()))) + m = regex.match(memory) + if m is None: + logger.error( + ( + f"unsupported memory specification '{memory}';" + " allowed suffixes: [K|M|G|T]" + ) + ) + sys.exit(1) + factor = siunits[m.group(2)] + return int(int(m.group(1)) * factor) + + +def parse_jobscript(): + """Minimal CLI to require/only accept single positional argument.""" + p = argparse.ArgumentParser(description="SLURM snakemake submit script") + p.add_argument("jobscript", help="Snakemake jobscript with job properties.") + return p.parse_args().jobscript + + +def parse_sbatch_defaults(parsed): + """Unpack SBATCH_DEFAULTS.""" + d = parsed.split() if type(parsed) == str else parsed + args = {} + for keyval in [a.split("=") for a in d]: + k = keyval[0].strip().strip("-") + v = keyval[1].strip() if len(keyval) == 2 else None + args[k] = v + return args + + +def load_cluster_config(path): + """Load config to dict + + Load configuration to dict either from absolute path or relative + to profile dir. + """ + if path: + path = os.path.join(dirname(__file__), os.path.expandvars(path)) + dcc = io.load_configfile(path) + else: + dcc = {} + if "__default__" not in dcc: + dcc["__default__"] = {} + return dcc + + +# adapted from format function in snakemake.utils +def format(_pattern, _quote_all=False, **kwargs): # noqa: A001 + """Format a pattern in Snakemake style. + This means that keywords embedded in braces are replaced by any variable + values that are available in the current namespace. + """ + fmt = SequenceFormatter(separator=" ") + if _quote_all: + fmt.element_formatter = AlwaysQuotedFormatter() + else: + fmt.element_formatter = QuotedFormatter() + try: + return fmt.format(_pattern, **kwargs) + except KeyError as ex: + raise NameError( + f"The name {ex} is unknown in this context. Please " + "make sure that you defined that variable. " + "Also note that braces not used for variable access " + "have to be escaped by repeating them " + ) + + +# adapted from Job.format_wildcards in snakemake.jobs +def format_wildcards(string, job_properties): + """ Format a string with variables from the job. """ + + class Job(object): + def __init__(self, job_properties): + for key in job_properties: + setattr(self, key, job_properties[key]) + + job = Job(job_properties) + if "params" in job_properties: + job._format_params = Wildcards(fromdict=job_properties["params"]) + else: + job._format_params = None + if "wildcards" in job_properties: + job._format_wildcards = Wildcards(fromdict=job_properties["wildcards"]) + else: + job._format_wildcards = None + _variables = dict() + _variables.update( + dict(params=job._format_params, wildcards=job._format_wildcards) + ) + if hasattr(job, "rule"): + _variables.update(dict(rule=job.rule)) + try: + return format(string, **_variables) + except NameError as ex: + raise WorkflowError( + "NameError with group job {}: {}".format(job.jobid, str(ex)) + ) + except IndexError as ex: + raise WorkflowError( + "IndexError with group job {}: {}".format(job.jobid, str(ex)) + ) + + +# adapted from ClusterExecutor.cluster_params function in snakemake.executor +def format_values(dictionary, job_properties): + formatted = dictionary.copy() + for key, value in list(formatted.items()): + if key == "mem": + value = str(_convert_units_to_mb(value)) + if isinstance(value, str): + try: + formatted[key] = format_wildcards(value, job_properties) + except NameError as e: + msg = "Failed to format cluster config " "entry for job {}.".format( + job_properties["rule"] + ) + raise WorkflowError(msg, e) + return formatted + + +def convert_job_properties(job_properties, resource_mapping=None): + options = {} + if resource_mapping is None: + resource_mapping = {} + resources = job_properties.get("resources", {}) + for k, v in resource_mapping.items(): + options.update({k: resources[i] for i in v if i in resources}) + + if "threads" in job_properties: + options["cpus-per-task"] = job_properties["threads"] + return options + + +def ensure_dirs_exist(path): + """Ensure output folder for Slurm log files exist.""" + di = dirname(path) + if di == "": + return + if not os.path.exists(di): + os.makedirs(di, exist_ok=True) + return + + +def format_sbatch_options(**sbatch_options): + """Format sbatch options""" + options = [] + for k, v in sbatch_options.items(): + val = "" + if v is not None: + val = f"={v}" + options.append(f"--{k}{val}") + return options + + +def submit_job(jobscript, **sbatch_options): + """Submit jobscript and return jobid.""" + options = format_sbatch_options(**sbatch_options) + try: + cmd = ["sbatch"] + ["--parsable"] + options + [jobscript] + res = sp.check_output(cmd) + except sp.CalledProcessError as e: + raise e + # Get jobid + res = res.decode() + try: + jobid = re.search(r"(\d+)", res).group(1) + except Exception as e: + raise e + return jobid + + +def advanced_argument_conversion(arg_dict): + """Experimental adjustment of sbatch arguments to the given or default partition.""" + # Currently not adjusting for multiple node jobs + nodes = int(arg_dict.get("nodes", 1)) + if nodes > 1: + return arg_dict + partition = arg_dict.get("partition", None) or _get_default_partition() + constraint = arg_dict.get("constraint", None) + ncpus = int(arg_dict.get("cpus-per-task", 1)) + runtime = arg_dict.get("time", None) + memory = _convert_units_to_mb(arg_dict.get("mem", 0)) + config = _get_cluster_configuration(partition, constraint, memory) + mem = arg_dict.get("mem", ncpus * min(config["MEMORY_PER_CPU"])) + mem = _convert_units_to_mb(mem) + if mem > max(config["MEMORY"]): + logger.info( + f"requested memory ({mem}) > max memory ({max(config['MEMORY'])}); " + "adjusting memory settings" + ) + mem = max(config["MEMORY"]) + + # Calculate available memory as defined by the number of requested + # cpus times memory per cpu + AVAILABLE_MEM = ncpus * min(config["MEMORY_PER_CPU"]) + # Add additional cpus if memory is larger than AVAILABLE_MEM + if mem > AVAILABLE_MEM: + logger.info( + f"requested memory ({mem}) > " + f"ncpus x MEMORY_PER_CPU ({AVAILABLE_MEM}); " + "trying to adjust number of cpus up" + ) + ncpus = int(math.ceil(mem / min(config["MEMORY_PER_CPU"]))) + if ncpus > max(config["CPUS"]): + logger.info( + f"ncpus ({ncpus}) > available cpus ({max(config['CPUS'])}); " + "adjusting number of cpus down" + ) + ncpus = min(int(max(config["CPUS"])), ncpus) + adjusted_args = {"mem": int(mem), "cpus-per-task": ncpus} + + # Update time. If requested time is larger than maximum allowed time, reset + if runtime: + runtime = time_to_minutes(runtime) + time_limit = max(config["TIMELIMIT_MINUTES"]) + if runtime > time_limit: + logger.info( + f"time (runtime) > time limit {time_limit}; " "adjusting time down" + ) + adjusted_args["time"] = time_limit + + # update and return + arg_dict.update(adjusted_args) + return arg_dict + + +timeformats = [ + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+):(?P<minutes>\d+)$"), + re.compile(r"^(?P<days>\d+)-(?P<hours>\d+)$"), + re.compile(r"^(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<minutes>\d+):(?P<seconds>\d+)$"), + re.compile(r"^(?P<minutes>\d+)$"), +] + + +def time_to_minutes(time): + """Convert time string to minutes. + + According to slurm: + + Acceptable time formats include "minutes", "minutes:seconds", + "hours:minutes:seconds", "days-hours", "days-hours:minutes" + and "days-hours:minutes:seconds". + + """ + if not isinstance(time, str): + time = str(time) + d = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0} + regex = list(filter(lambda regex: regex.match(time) is not None, timeformats)) + if len(regex) == 0: + return + assert len(regex) == 1, "multiple time formats match" + m = regex[0].match(time) + d.update(m.groupdict()) + minutes = ( + int(d["days"]) * 24 * 60 + + int(d["hours"]) * 60 + + int(d["minutes"]) + + math.ceil(int(d["seconds"]) / 60) + ) + assert minutes > 0, "minutes has to be greater than 0" + return minutes + + +def _get_default_partition(): + """Retrieve default partition for cluster""" + cluster = CookieCutter.get_cluster_option() + cmd = f"sinfo -O partition {cluster}" + res = sp.check_output(cmd.split()) + m = re.search(r"(?P<partition>\S+)\*", res.decode(), re.M) + partition = m.group("partition") + return partition + + +def _get_cluster_configuration(partition, constraints=None, memory=0): + """Retrieve cluster configuration. + + Retrieve cluster configuration for a partition filtered by + constraints, memory and cpus + + """ + try: + import pandas as pd + except ImportError: + print( + "Error: currently advanced argument conversion " + "depends on 'pandas'.", file=sys.stderr + ) + sys.exit(1) + + if constraints: + constraint_set = set(constraints.split(",")) + cluster = CookieCutter.get_cluster_option() + cmd = f"sinfo -e -o %all -p {partition} {cluster}".split() + try: + output = sp.Popen(" ".join(cmd), shell=True, stdout=sp.PIPE).communicate() + except Exception as e: + print(e) + raise + data = re.sub("^CLUSTER:.+\n", "", re.sub(" \\|", "|", output[0].decode())) + df = pd.read_csv(StringIO(data), sep="|") + try: + df["TIMELIMIT_MINUTES"] = df["TIMELIMIT"].apply(time_to_minutes) + df["MEMORY_PER_CPU"] = df["MEMORY"] / df["CPUS"] + df["FEATURE_SET"] = df["AVAIL_FEATURES"].str.split(",").apply(set) + except Exception as e: + print(e) + raise + if constraints: + constraint_set = set(constraints.split(",")) + i = df["FEATURE_SET"].apply(lambda x: len(x.intersection(constraint_set)) > 0) + df = df.loc[i] + memory = min(_convert_units_to_mb(memory), max(df["MEMORY"])) + df = df.loc[df["MEMORY"] >= memory] + return df diff --git a/resources/config_schema.json b/resources/config_schema.json index 1ad7846..d5b6ab9 100644 --- a/resources/config_schema.json +++ b/resources/config_schema.json @@ -17,6 +17,10 @@ "type": "string", "description": "Path to log directory." }, + "cluster_log_dir": { + "type": "string", + "description": "Path to cluster log directory." + }, "kallisto_indexes": { "type": "string", "description": "Path to kallisto indexes directory." diff --git a/tests/input_files/config.mutliple_lanes.yml b/tests/input_files/config.mutliple_lanes.yml index 8fb3406..b3e64ea 100644 --- a/tests/input_files/config.mutliple_lanes.yml +++ b/tests/input_files/config.mutliple_lanes.yml @@ -2,6 +2,7 @@ samples: "../input_files/samples.multiple_lanes.tsv" output_dir: "results" log_dir: "logs" + cluster_log_dir: "logs/cluster" kallisto_indexes: "results/kallisto_indexes" salmon_indexes: "results/salmon_indexes" star_indexes: "results/star_indexes" diff --git a/tests/input_files/config.yaml b/tests/input_files/config.yaml index 1fc480c..06a9704 100644 --- a/tests/input_files/config.yaml +++ b/tests/input_files/config.yaml @@ -3,6 +3,7 @@ samples: "../input_files/samples.tsv" output_dir: "results" log_dir: "logs" + cluster_log_dir: "logs/cluster" kallisto_indexes: "results/kallisto_indexes" salmon_indexes: "results/salmon_indexes" star_indexes: "results/star_indexes" diff --git a/tests/test_create_dag_image/test.sh b/tests/test_create_dag_image/test.sh index 87b5dad..68dc100 100755 --- a/tests/test_create_dag_image/test.sh +++ b/tests/test_create_dag_image/test.sh @@ -20,13 +20,7 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ + --profile="../../profiles/graphs" \ --configfile="../input_files/config.yaml" \ --dag \ - --printshellcmds \ - --dryrun \ - --verbose \ - --notemp \ - --no-hooks \ | dot -Tsvg > "../../images/dag_test_workflow.svg" - diff --git a/tests/test_create_rule_graph/test.sh b/tests/test_create_rule_graph/test.sh index 4e53450..f0bfc36 100755 --- a/tests/test_create_rule_graph/test.sh +++ b/tests/test_create_rule_graph/test.sh @@ -20,13 +20,7 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ + --profile="../../profiles/graphs" \ --configfile="../input_files/config.yaml" \ --rulegraph \ - --printshellcmds \ - --dryrun \ - --verbose \ - --notemp \ - --no-hooks \ | dot -Tsvg > "../../images/rule_graph.svg" - diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index 201c4ca..68aa229 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -26,16 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.yaml" \ - --cores=4 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/local-singularity" \ + --configfile="../input_files/config.yaml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/tests/test_integration_workflow/test.slurm.sh b/tests/test_integration_workflow/test.slurm.sh index 22f2f52..f847ead 100755 --- a/tests/test_integration_workflow/test.slurm.sh +++ b/tests/test_integration_workflow/test.slurm.sh @@ -26,19 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.yaml" \ - --cluster-config="../input_files/cluster.json" \ - --cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ - --cores=256 \ - --jobs=256 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/slurm-singularity" \ + --configfile="../input_files/config.yaml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/tests/test_integration_workflow_multiple_lanes/test.local.sh b/tests/test_integration_workflow_multiple_lanes/test.local.sh index 48b95e8..01a75ef 100755 --- a/tests/test_integration_workflow_multiple_lanes/test.local.sh +++ b/tests/test_integration_workflow_multiple_lanes/test.local.sh @@ -26,16 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.mutliple_lanes.yml" \ - --cores=4 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/local-singularity" \ + --configfile="../input_files/config.mutliple_lanes.yml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/tests/test_integration_workflow_multiple_lanes/test.slurm.sh b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh index c16a757..39ff724 100755 --- a/tests/test_integration_workflow_multiple_lanes/test.slurm.sh +++ b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh @@ -26,19 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.mutliple_lanes.yml" \ - --cluster-config="../input_files/cluster.json" \ - --cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ - --cores=256 \ - --jobs=256 \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/slurm-singularity" \ + --configfile="../input_files/config.mutliple_lanes.yml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/tests/test_integration_workflow_with_conda/test.local.sh b/tests/test_integration_workflow_with_conda/test.local.sh index 8a29108..bc6263d 100755 --- a/tests/test_integration_workflow_with_conda/test.local.sh +++ b/tests/test_integration_workflow_with_conda/test.local.sh @@ -26,15 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.yaml" \ - --cores=4 \ - --printshellcmds \ - --rerun-incomplete \ - --use-conda \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/local-conda" \ + --configfile="../input_files/config.yaml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/tests/test_integration_workflow_with_conda/test.slurm.sh b/tests/test_integration_workflow_with_conda/test.slurm.sh index 3fe2491..890fd3c 100755 --- a/tests/test_integration_workflow_with_conda/test.slurm.sh +++ b/tests/test_integration_workflow_with_conda/test.slurm.sh @@ -26,18 +26,8 @@ cd $script_dir # Run tests snakemake \ - --snakefile="../../workflow/Snakefile" \ - --configfile="../input_files/config.yaml" \ - --cluster-config="../input_files/cluster.json" \ - --cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ - --cores=256 \ - --jobs=256 \ - --printshellcmds \ - --rerun-incomplete \ - --use-conda \ - --notemp \ - --no-hooks \ - --verbose + --profile="../../profiles/slurm-conda" \ + --configfile="../input_files/config.yaml" # Create a Snakemake report after the workflow execution snakemake \ diff --git a/workflow/Snakefile b/workflow/Snakefile index ee50d8b..52317cb 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -37,15 +37,6 @@ except KeyError: rule_config = {} logger.warning(f"No rule config specified: using default values for all tools.") -# Create dir for cluster logs, if applicable -if cluster_config: - os.makedirs( - os.path.join( - os.getcwd(), - os.path.dirname(cluster_config['__default__']['out']), - ), - exist_ok=True) - ## Function definitions @@ -173,6 +164,9 @@ rule start: "{sample}", "start", "{sample}.{mate}.fastq.gz") + + params: + cluster_log_path = config["cluster_log_dir"] log: stderr = os.path.join( @@ -217,6 +211,7 @@ rule fastqc: "{mate}")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -287,6 +282,7 @@ rule create_index_star: "chrName.txt") params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config['star_indexes'], "{organism}", @@ -366,6 +362,7 @@ rule extract_transcriptome: "transcriptome.fa")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -422,6 +419,9 @@ rule concatenate_transcriptome_and_genome: "transcriptome", "{organism}", "genome_transcriptome.fa")) + + params: + cluster_log_path = config["cluster_log_dir"] singularity: "docker://ubuntu:focal-20210416" @@ -465,6 +465,7 @@ rule create_index_salmon: "salmon.idx")) params: + cluster_log_path = config["cluster_log_dir"], kmerLen = "{kmer}", additional_params = parse_rule_config( rule_config, @@ -523,6 +524,7 @@ rule create_index_kallisto: "kallisto.idx") params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config['kallisto_indexes'], "{organism}"), @@ -573,6 +575,7 @@ rule extract_transcripts_as_bed12: "full_transcripts_protein_coding.bed")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -627,6 +630,7 @@ rule index_genomic_alignment_samtools: "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai") params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -705,6 +709,7 @@ rule calculate_TIN_scores: "TIN_score.tsv")) params: + cluster_log_path = config["cluster_log_dir"], sample = "{sample}", additional_params = parse_rule_config( rule_config, @@ -769,6 +774,7 @@ rule salmon_quantmerge_genes: "genes_{salmon_merge_on}.tsv") params: + cluster_log_path = config["cluster_log_dir"], salmon_in = expand( os.path.join( config["output_dir"], @@ -855,6 +861,7 @@ rule salmon_quantmerge_transcripts: "transcripts_{salmon_merge_on}.tsv") params: + cluster_log_path = config["cluster_log_dir"], salmon_in = expand( os.path.join( config["output_dir"], @@ -944,6 +951,7 @@ rule kallisto_merge_genes: "genes_counts.tsv") params: + cluster_log_path = config["cluster_log_dir"], dir_out = os.path.join( config["output_dir"], "summary_kallisto"), @@ -1029,6 +1037,7 @@ rule kallisto_merge_transcripts: "transcripts_counts.tsv") params: + cluster_log_path = config["cluster_log_dir"], dir_out = os.path.join( config["output_dir"], "summary_kallisto"), @@ -1095,6 +1104,7 @@ rule pca_salmon: "pca_salmon_{molecule}")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -1144,6 +1154,7 @@ rule pca_kallisto: "pca_kallisto_{molecule}")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -1239,6 +1250,7 @@ rule star_rpm: shadow: "full" params: + cluster_log_path = config["cluster_log_dir"], out_dir = lambda wildcards, output: os.path.dirname(output.str1), prefix = lambda wildcards, output: @@ -1336,6 +1348,9 @@ rule rename_star_rpm_for_alfa: "ALFA", "{unique}", "{sample}.{unique}.minus.bg")) + + params: + cluster_log_path = config["cluster_log_dir"] log: stderr = os.path.join( @@ -1389,6 +1404,7 @@ rule generate_alfa_index: "sorted_genes.unstranded.ALFA_index") params: + cluster_log_path = config["cluster_log_dir"], genome_index = "sorted_genes", out_dir = lambda wildcards, output: os.path.dirname(output.index_stranded), @@ -1484,6 +1500,7 @@ rule alfa_qc: "{sample}.ALFA_feature_counts.tsv") params: + cluster_log_path = config["cluster_log_dir"], out_dir = lambda wildcards, output: os.path.dirname(output.biotypes), genome_index = lambda wildcards, input: @@ -1551,6 +1568,7 @@ rule prepare_multiqc_config: "multiqc_config.yaml") params: + cluster_log_path = config["cluster_log_dir"], logo_path = config['report_logo'], multiqc_intro_text = config['report_description'], url = config['report_url'], @@ -1670,6 +1688,7 @@ rule multiqc_report: "multiqc_summary")) params: + cluster_log_path = config["cluster_log_dir"], results_dir = os.path.join( config["output_dir"]), log_dir = config["log_dir"], @@ -1729,6 +1748,7 @@ rule sort_bed_4_big: "{sample}_{unique}_{strand}.sorted.bg")) params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -1794,6 +1814,7 @@ rule prepare_bigWig: "{sample}_{unique}_{strand}.bw") params: + cluster_log_path = config["cluster_log_dir"], additional_params = parse_rule_config( rule_config, current_rule=current_rule, diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 76a88f1..d0a9bc2 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -31,6 +31,7 @@ rule pe_remove_adapters_cutadapt: "{sample}.pe.remove_adapters_mate2.fastq.gz")) params: + cluster_log_path = config["cluster_log_dir"], adapter_3_mate1 = lambda wildcards: get_sample('fq1_3p', search_id='index', search_value=wildcards.sample), adapter_5_mate1 = lambda wildcards: @@ -118,6 +119,7 @@ rule pe_remove_polya_cutadapt: "{sample}.pe.remove_polya_mate2.fastq.gz")) params: + cluster_log_path = config["cluster_log_dir"], polya_3_mate1 = lambda wildcards: get_sample( 'fq1_polya_3p', @@ -234,6 +236,7 @@ rule pe_map_genome_star: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], sample_id = "{sample}", index = lambda wildcards: os.path.abspath(os.path.join( @@ -385,6 +388,7 @@ rule pe_quantification_salmon: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config["output_dir"], "samples", @@ -485,6 +489,7 @@ rule pe_genome_quantification_kallisto: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config["output_dir"], "samples", diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index cd7bb1d..7dfb2c2 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -19,6 +19,7 @@ rule remove_adapters_cutadapt: "{sample}.se.remove_adapters_mate1.fastq.gz")) params: + cluster_log_path = config["cluster_log_dir"], adapters_3 = lambda wildcards: get_sample( 'fq1_3p', @@ -93,6 +94,7 @@ rule remove_polya_cutadapt: "{sample}.se.remove_polya_mate1.fastq.gz")) params: + cluster_log_path = config["cluster_log_dir"], polya_3 = lambda wildcards: get_sample( 'fq1_polya_3p', @@ -184,6 +186,7 @@ rule map_genome_star: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], sample_id = "{sample}", index = lambda wildcards: os.path.abspath(os.path.join( @@ -324,6 +327,7 @@ rule quantification_salmon: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config["output_dir"], "samples", @@ -428,6 +432,7 @@ rule genome_quantification_kallisto: shadow: "minimal" params: + cluster_log_path = config["cluster_log_dir"], output_dir = os.path.join( config["output_dir"], "samples", -- GitLab