From 8a3c8bb02f5552d77fba4b46881938cef77e37e4 Mon Sep 17 00:00:00 2001
From: B13nch3n <b13nch3n_01@theb-si.de>
Date: Tue, 6 Feb 2024 11:01:42 +0100
Subject: [PATCH] PEP8 for validator code, improved dev-mode in validator
 container

---
 pyproject.toml                    | 20 ++++++++++++++++++++
 validation/Dockerfile             | 20 ++++++++++++++++----
 validation/validate-mmcif-file.py | 20 ++++++++++++++++----
 3 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4805e0a..c0103a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,3 +12,23 @@ extension-pkg-allow-list='rapidjson'
 
 [tool.pylint.FORMAT]
 max-line-length=80
+
+[tool.pylint.deprecated_builtins]
+# We want to use proper logging, so we can control *ALL* output bei the Abseil
+# logger, hence: deprecate 'print'
+bad-functions = ["map", "filter", "print"]
+
+# Run the spell check every once in a while, having it enabled always, is too
+# annoying.
+[tool.pylint.spelling]
+max-spelling-suggestions = 4
+
+spelling-dict = "en_GB"
+
+spelling-ignore-comment-directives = "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:"
+
+spelling-ignore-words = ""
+
+spelling-private-dict-file = ".spelling"
+
+spelling-store-unknown-words = false
diff --git a/validation/Dockerfile b/validation/Dockerfile
index 01d3c8d..a75f3a9 100644
--- a/validation/Dockerfile
+++ b/validation/Dockerfile
@@ -1,5 +1,5 @@
 ARG VERSION_PYTHON="3.9"
-ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.17"
+ARG VERSION_BASE_IMAGE="python:${VERSION_PYTHON}-alpine3.19"
 FROM ${VERSION_BASE_IMAGE}
 # We need to declare ARGs again which were declared before the build stage
 # (FROM directive), otherwise they won't be available in this stage.
@@ -141,10 +141,22 @@ RUN set -e pipefail; \
 ARG ADD_DEV
 RUN set -e pipefail; \
     if test xYES = x`echo ${ADD_DEV} | tr '[:lower:]' '[:upper:]'`; then \
-      apk add bash emacs gcc build-base; \
-      /usr/local/bin/python -m pip install pylint black; \
+      apk add bash \
+              binutils \
+              build-base \
+              emacs \
+              enchant2-dev \
+              gcc \
+              hunspell-en-gb \
+              py3-enchant; \
+      /usr/local/bin/python -m pip install pylint[spelling] black; \
       apk del gcc build-base; \
-    fi
+    fi; \
+    # the alias assumes you are in the directory containing the code \
+    echo "alias black_n_pylint=\"black --config ../pyproject.toml " \
+         "test-suite.py validate-mmcif-file.py && pylint " \
+         "--rc-file=../pyproject.toml test-suite.py validate-mmcif-file.py\"" \
+         >> /etc/bash/bashrc
 
 ## Add a dedicated user for mmCIF file validation
 ## MMCIF_USER_ID can be used to avoid file permission issues in development.
diff --git a/validation/validate-mmcif-file.py b/validation/validate-mmcif-file.py
index e3e4d0a..b76bea0 100755
--- a/validation/validate-mmcif-file.py
+++ b/validation/validate-mmcif-file.py
@@ -490,7 +490,11 @@ class _CifCheck:
 
     def __init__(self, dict_sdb, json_out_file=None, verbose=False):
         self._version = None
-        self.check_results = {"errors": [], "diagnosis": [], "cifcheck-errors": []}
+        self.check_results = {
+            "errors": [],
+            "diagnosis": [],
+            "cifcheck-errors": [],
+        }
         self.dict_sdb = os.path.abspath(dict_sdb)
         self.json_out_file = json_out_file
         self.verbose = verbose
@@ -812,12 +816,20 @@ class _CifCheck:
             "missing_files": {},
         }
         for line in self.check_results["errors"]:
-            match = re.match(r"ma_entry_associated_files.file_url '(?P<arc>.*)' is missing ma_associated_archive_file_details.file_path '(?P<fle>.*)'", line)
+            match = re.match(
+                r"ma_entry_associated_files.file_url '(?P<arc>.*)' is missing "
+                + r"ma_associated_archive_file_details.file_path '(?P<fle>.*)'",
+                line,
+            )
             if match is not None:
                 try:
-                    rprt["missing_files"][match.group('arc')].append(match.group('fle'))
+                    rprt["missing_files"][match.group("arc")].append(
+                        match.group("fle")
+                    )
                 except KeyError:
-                    rprt["missing_files"][match.group('arc')] = [match.group('fle')]
+                    rprt["missing_files"][match.group("arc")] = [
+                        match.group("fle")
+                    ]
                 continue
             # Unmatched lines need to be added to above evaluation
             raise RuntimeError(f'Unmatched error line found:\n"""{line}"""')
-- 
GitLab