From 1d3ace6ec5dc7639479f20f33e067f0730becf2d Mon Sep 17 00:00:00 2001 From: Dominik Burri <dominik.burri@unibas.ch> Date: Thu, 15 Jul 2021 13:40:02 +0000 Subject: [PATCH] Validation of configuration file with a json schema `resources/config_schema.json` using the snakemake utility **validate**. The schema includes all the fields from `tests/input_files/config.yaml`, assigns certain fields as required and forbids the use of additional fields. All of the optional fields include default values, taken from the config above, except `rule_config` which has no default. Added author name and author email as optional fields in `tests/input_files/config.yaml`, and added it into the multiqc report. --- resources/config_schema.json | 66 +++++++++++++++++++++ tests/input_files/config.mutliple_lanes.yml | 2 +- tests/input_files/config.yaml | 6 +- tests/input_files/config_alfa.yaml | 13 ++-- workflow/Snakefile | 29 ++++++--- 5 files changed, 99 insertions(+), 17 deletions(-) create mode 100644 resources/config_schema.json diff --git a/resources/config_schema.json b/resources/config_schema.json new file mode 100644 index 0000000..1ad7846 --- /dev/null +++ b/resources/config_schema.json @@ -0,0 +1,66 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Configuration schema", + "required": ["samples", "output_dir", "log_dir", "kallisto_indexes", "salmon_indexes", "star_indexes", "alfa_indexes"], + "type": "object", + "additionalProperties": false, + "properties": { + "samples": { + "type": "string", + "description": "Path to samples table." + }, + "output_dir": { + "type": "string", + "description": "Path to output directory." + }, + "log_dir": { + "type": "string", + "description": "Path to log directory." + }, + "kallisto_indexes": { + "type": "string", + "description": "Path to kallisto indexes directory." + }, + "salmon_indexes": { + "type": "string", + "description": "Path to salmon indexes directory." + }, + "star_indexes": { + "type": "string", + "description": "Path to star indexes directory." + }, + "alfa_indexes": { + "type": "string", + "description": "Path to alfa indexes directory." + }, + "rule_config": { + "type": "string", + "description": "Path to rule configuration file." + }, + "report_description": { + "type": "string", + "description": "Description of the run to appear in the multiqc report.", + "default": "No description provided by user." + }, + "report_logo": { + "type": "string", + "description": "Relative path to image to display as logo in multiqc report.", + "default": "../../images/logo.128px.png" + }, + "report_url": { + "type": "string", + "description": "URL to appear in multiqc report.", + "default": "https://zavolan.biozentrum.unibas.ch/" + }, + "author_name": { + "type": "string", + "description": "Full author name to display in multiqc report.", + "default": "NA" + }, + "author_email": { + "type": "string", + "description": "Author e-mail address to display in multiqc report.", + "default": "NA" + } + } +} diff --git a/tests/input_files/config.mutliple_lanes.yml b/tests/input_files/config.mutliple_lanes.yml index 3d04a01..8fb3406 100644 --- a/tests/input_files/config.mutliple_lanes.yml +++ b/tests/input_files/config.mutliple_lanes.yml @@ -1,12 +1,12 @@ --- samples: "../input_files/samples.multiple_lanes.tsv" - rule_config: "../input_files/rule_config.yaml" output_dir: "results" log_dir: "logs" kallisto_indexes: "results/kallisto_indexes" salmon_indexes: "results/salmon_indexes" star_indexes: "results/star_indexes" alfa_indexes: "results/alfa_indexes" + rule_config: "../input_files/rule_config.yaml" report_description: "No description provided by user" report_logo: "../../images/logo.128px.png" report_url: "https://zavolan.biozentrum.unibas.ch/" diff --git a/tests/input_files/config.yaml b/tests/input_files/config.yaml index 853a8a7..1fc480c 100644 --- a/tests/input_files/config.yaml +++ b/tests/input_files/config.yaml @@ -1,13 +1,17 @@ --- + # Required fields samples: "../input_files/samples.tsv" - rule_config: "../input_files/rule_config.yaml" output_dir: "results" log_dir: "logs" kallisto_indexes: "results/kallisto_indexes" salmon_indexes: "results/salmon_indexes" star_indexes: "results/star_indexes" alfa_indexes: "results/alfa_indexes" + # Optional fields + rule_config: "../input_files/rule_config.yaml" report_description: "No description provided by user" report_logo: "../../images/logo.128px.png" report_url: "https://zavolan.biozentrum.unibas.ch/" + author_name: "NA" + author_email: "NA" ... diff --git a/tests/input_files/config_alfa.yaml b/tests/input_files/config_alfa.yaml index cdcfb8b..5edcd7e 100644 --- a/tests/input_files/config_alfa.yaml +++ b/tests/input_files/config_alfa.yaml @@ -1,11 +1,12 @@ --- samples: "../input_files/samples_alfa.tsv" - output_dir: "results/" - log_dir: "logs/" - kallisto_indexes: "results/kallisto_indexes/" - salmon_indexes: "results/salmon_indexes/" - star_indexes: "results/star_indexes/" - alfa_indexes: "results/alfa_indexes/" + output_dir: "results" + log_dir: "logs" + kallisto_indexes: "results/kallisto_indexes" + salmon_indexes: "results/salmon_indexes" + star_indexes: "results/star_indexes" + alfa_indexes: "results/alfa_indexes" + rule_config: "../input_files/rule_config.yaml" report_description: "No description provided by user" report_logo: "../../images/logo.128px.png" report_url: "https://zavolan.biozentrum.unibas.ch/" diff --git a/workflow/Snakefile b/workflow/Snakefile index d46c511..fc268e4 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -5,6 +5,7 @@ import shutil import yaml from shlex import quote from typing import Tuple +from snakemake.utils import validate ## Preparations # Get sample table @@ -17,16 +18,22 @@ samples_table = pd.read_csv( sep="\t", ) +# Validat config +validate(config, os.path.join("..", "resources", "config_schema.json")) +logger.info(f'Config file after validation: {config}') + # Parse YAML rule config file -if 'rule_config' in config and config['rule_config']: - try: - with open(config['rule_config']) as _file: - rule_config = yaml.safe_load(_file) - logger.info(f"Loaded rule_config from {config['rule_config']}.") - except FileNotFoundError: - logger.error(f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! ") - raise -else: +try: + with open(config['rule_config']) as _file: + rule_config = yaml.safe_load(_file) + logger.info(f"Loaded rule_config from {config['rule_config']}.") +except TypeError: + logger.error(f'No string supplied at field "rule_config", but: {type(config["rule_config"])} with content: {config["rule_config"]}') + raise +except FileNotFoundError: + logger.error(f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! ") + raise +except KeyError: rule_config = {} logger.warning(f"No rule config specified: using default values for all tools.") @@ -1547,6 +1554,8 @@ rule prepare_multiqc_config: logo_path = config['report_logo'], multiqc_intro_text = config['report_description'], url = config['report_url'], + author_name = config['author_name'], + author_email = config['author_email'], additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -1572,6 +1581,8 @@ rule prepare_multiqc_config: --intro-text '{params.multiqc_intro_text}' \ --custom-logo {params.logo_path} \ --url '{params.url}' \ + --author-name {params.author_name} \ + --author-email {params.author_email} \ {params.additional_params}) \ 1> {log.stdout} 2> {log.stderr}" -- GitLab