diff --git a/validation/validate-mmcif-file.py b/validation/validate-mmcif-file.py index eaf28ddd0cf53a6e5b019d0fd6ecaad9c500d82e..4c79c3e0106c70d74919df6850fafaebe94bfc83 100755 --- a/validation/validate-mmcif-file.py +++ b/validation/validate-mmcif-file.py @@ -20,6 +20,7 @@ import argparse import atexit import copy import os +import re import subprocess import sys import tempfile @@ -85,6 +86,14 @@ def _parse_command_line(): + "issues.", default=None, ) + parser.add_argument( + "--report", + "-r", + action="store_true", + help="Print a concise report. Skips redundancies, may loose some " + + "information. Usable to verify your own writer on single files. " + + "Resolving problems of the report very often fixes the whole file.", + ) parser.add_argument( "--verbose", "-v", @@ -691,6 +700,127 @@ class _CifCheck: msgs["cifcheck-errors"] ) + def make_report(self): + """Make a concise report out of the results. + + Be aware, that cuts away the majority of the messages. But solving those + issues first, may already repair a mmCIF file.""" + not_implemented = ["errors"] + for category in not_implemented: + if len(self.check_results[category]) > 0: + raise NotImplementedError( + f"Results for category '{category}' not yet supported in " + + "report." + ) + print("Report") + print("======") + print(f"Status of check: {self.check_results['status']}") + print("CIF dictionaries used:") + for dct in self.check_results["versions"]: + print(f" {dct['title']}/ {dct['version']}") + print(f" {dct['location']}") + + # condense diagnosis data + rprt = { + "missing_cats": set(), + "missing_itms": set(), + "parchild_mm": set(), + } + for line in self.check_results["diagnosis"]: + # missing categories + for pttrn in [ + r"^ERROR - category \"(?P<cat>.*)\" is mandatory, but it is " + + r"not present in datablock \"(?P<dblock>.*)\"$", + r"^\+\+ERROR - In block \"(?P<dblock>.*)\", parent category " + + r"\"(?P<cat>.*)\", of category \".*\", is missing\.$", + ]: + match = re.match(pttrn, line) + if match is not None: + rprt["missing_cats"].add(match.group("cat")) + _check_dblock_name(match.group("dblock"), rprt) + break + if match is not None: + continue + # missing items + for pttrn in [ + r"^ERROR - In block \"(?P<dblock>.*)\", mandatory " + + r"item \"(?P<itm>.*)\" is not in category \"(?P<cat>.*)\"$" + ]: + match = re.match(pttrn, line) + if match is not None: + rprt["missing_itms"].add( + f"{match.group('cat')}.{match.group('itm')}" + ) + _check_dblock_name(match.group("dblock"), rprt) + break + if match is not None: + continue + # parent-child issues + match = re.match( + r"^ERROR PARCHILD \".*\" - In block \"(?P<dblock>.*)\", in " + + r"category \"(?P<chld>.*)\", in row\# \d+, unmatched value " + + r"in the parent \"(?P<prnt>.*)\"$", + line, + ) + if match is not None: + rprt["parchild_mm"].add( + f"{match.group('chld')}->{match.group('prnt')}" + ) + _check_dblock_name(match.group("dblock"), rprt) + continue + match = re.match( + r"^\"(?P<chld>.*)\" -> \"(?P<prnt>.*)\" value =(?P<vle>.*)$", + line, + ) + if match is not None: + # prepare a string to be removed from parchild_mm + chld = match.group("chld").split(".")[0][1:] + prnt = match.group("prnt").split(".")[0][1:] + try: + rprt["parchild_mm"].remove(f"{chld}->{prnt}") + except KeyError: + pass + # add a more verbose line instead + rprt["parchild_mm"].add( + f"{match.group('chld')}->{match.group('prnt')}, " + + f"value={match.group('vle')}" + ) + continue + # Unmatched lines need to be added to above evaluation + raise RuntimeError( + f'Unmatched diagnosis line found:\n"""{line}"""' + ) + + # print above evaluation in the report + # datablock + print("Diagnosis:") + print(" Datablock/ entry name:", rprt["datablock"]) + print(" Missing categories:") + if len(rprt["missing_cats"]) > 0: + for line in sorted(rprt["missing_cats"]): + print(f" {line}") + if len(rprt["missing_itms"]) > 0: + print(" Missing items:") + for line in sorted(rprt["missing_itms"]): + print(f" {line}") + if len(rprt["parchild_mm"]) > 0: + print(" Mismatching parent/ child relationships:") + for line in sorted(rprt["parchild_mm"]): + print(f" {line}") + + +def _check_dblock_name(name, report): + """Compare datablock names.""" + try: + # pylint: disable=used-before-assignment + if report["datablock"] != name: + raise RuntimeError( + "Two different datablock (names) found: " + + f"{report['datablock']} vs {name}" + ) + except KeyError: + report["datablock"] = name + def _find_utf(line): """Try to find a word with an UTF character in a string.""" @@ -773,6 +903,10 @@ def _main(): print(f"Writing results of CifCheck to '{opts.out_file}'") cifcheck.make_json_output() + # print a report to stdout + if opts.report: + cifcheck.make_report() + if cifcheck.got_issues(): # If CifCheck found issues with the mmCIF file, exit with code 2. Exit # code 1 is reserved for general issues running the command, like "file