From 00d3f7418cfb5d9b2de97da6edb3e22815aeadf1 Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Thu, 9 Nov 2023 14:31:49 +0100
Subject: [PATCH] Use reference DB info from JSON input

---
 convert_to_modelcif.py | 58 +++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py
index a2c3747..c6dcd81 100755
--- a/convert_to_modelcif.py
+++ b/convert_to_modelcif.py
@@ -364,38 +364,48 @@ def _cast_release_date(release_date):
         raise
 
 
+def _cmp_ref_dbs(db_dct, db_objs):
+    """Compare a reference DB dict to a list of ReferenceDatabase objects.
+    Note: does not check the DB name!"""
+    for obj in db_objs:
+        if db_dct["release_date"] != obj.release_date:
+            continue
+        if db_dct["version"] != obj.version:
+            continue
+        for url in db_dct["location_url"]:
+            if url == obj.url:
+                return True
+
+    return False
+
+
 def _get_modelcif_ref_dbs(meta_json):
     """Get sequence databases used for monomer features."""
     # vendor formatting for DB names/ URLs, extend on KeyError
     # ToDo: adapt to new JSON input
-    sdb_dct = {}  # 'sequence database list', starts as dict
+    sdb_lst = {}  # 'sequence database list' starts as dict since we need to
+    # compare DBs between the different monomers.
+    i = 0
     for data in meta_json.values():
+        i += 1
         for db_name, vdct in data["databases"].items():
-            if vdct["version"] == "NA":
-                vdct["version"] = None
             vdct["release_date"] = _cast_release_date(vdct["release_date"])
-            # if DB already exists, check URL and version
-            if db_name in sdb_dct:
-                # ToDo: switch URL to the actual URL read from JSON
-                if (
-                    sdb_dct[db_name].version != vdct["version"]
-                    or sdb_dct[db_name].url != vdct["location_url"][0]
-                ):
-                    raise RuntimeError(
-                        "Database versions or URLs differ for "
-                        + f"'{db_name}': '{sdb_dct[db_name].version}/ "
-                        + f"{sdb_dct[db_name].url}' vs. '{vdct['version']}/ "
-                        + f"{vdct['location_url'][0]}'"
+            if db_name in sdb_lst:
+                if _cmp_ref_dbs(vdct, sdb_lst[db_name]):
+                    continue
+            else:
+                sdb_lst[db_name] = []
+            for url in vdct["location_url"]:
+                sdb_lst[db_name].append(
+                    modelcif.ReferenceDatabase(
+                        db_name,
+                        url,
+                        version=vdct["version"],
+                        release_date=vdct["release_date"],
                     )
-            # ToDo: deal with DBs with multiple URLs
-            sdb_dct[db_name] = modelcif.ReferenceDatabase(
-                db_name,
-                vdct["location_url"][0],
-                version=vdct["version"],
-                release_date=vdct["release_date"],
-            )
+                )
 
-    return sdb_dct.values()
+    return [x for sublist in sdb_lst.values() for x in sublist]
 
 
 def _store_as_modelcif(
@@ -472,7 +482,7 @@ def _store_as_modelcif(
             system.target_entities,
             model,
             sw_dct,
-            # ToDo: _storte_as_modelcif should not use __meta__, __meta__ is
+            # ToDo: _store_as_modelcif should not use __meta__, __meta__ is
             #       tool specific
             _get_modelcif_ref_dbs(data_json["__meta__"]),
         )
-- 
GitLab