From 00d3f7418cfb5d9b2de97da6edb3e22815aeadf1 Mon Sep 17 00:00:00 2001 From: Stefan Bienert <stefan.bienert@unibas.ch> Date: Thu, 9 Nov 2023 14:31:49 +0100 Subject: [PATCH] Use reference DB info from JSON input --- convert_to_modelcif.py | 58 +++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/convert_to_modelcif.py b/convert_to_modelcif.py index a2c3747..c6dcd81 100755 --- a/convert_to_modelcif.py +++ b/convert_to_modelcif.py @@ -364,38 +364,48 @@ def _cast_release_date(release_date): raise +def _cmp_ref_dbs(db_dct, db_objs): + """Compare a reference DB dict to a list of ReferenceDatabase objects. + Note: does not check the DB name!""" + for obj in db_objs: + if db_dct["release_date"] != obj.release_date: + continue + if db_dct["version"] != obj.version: + continue + for url in db_dct["location_url"]: + if url == obj.url: + return True + + return False + + def _get_modelcif_ref_dbs(meta_json): """Get sequence databases used for monomer features.""" # vendor formatting for DB names/ URLs, extend on KeyError # ToDo: adapt to new JSON input - sdb_dct = {} # 'sequence database list', starts as dict + sdb_lst = {} # 'sequence database list' starts as dict since we need to + # compare DBs between the different monomers. + i = 0 for data in meta_json.values(): + i += 1 for db_name, vdct in data["databases"].items(): - if vdct["version"] == "NA": - vdct["version"] = None vdct["release_date"] = _cast_release_date(vdct["release_date"]) - # if DB already exists, check URL and version - if db_name in sdb_dct: - # ToDo: switch URL to the actual URL read from JSON - if ( - sdb_dct[db_name].version != vdct["version"] - or sdb_dct[db_name].url != vdct["location_url"][0] - ): - raise RuntimeError( - "Database versions or URLs differ for " - + f"'{db_name}': '{sdb_dct[db_name].version}/ " - + f"{sdb_dct[db_name].url}' vs. '{vdct['version']}/ " - + f"{vdct['location_url'][0]}'" + if db_name in sdb_lst: + if _cmp_ref_dbs(vdct, sdb_lst[db_name]): + continue + else: + sdb_lst[db_name] = [] + for url in vdct["location_url"]: + sdb_lst[db_name].append( + modelcif.ReferenceDatabase( + db_name, + url, + version=vdct["version"], + release_date=vdct["release_date"], ) - # ToDo: deal with DBs with multiple URLs - sdb_dct[db_name] = modelcif.ReferenceDatabase( - db_name, - vdct["location_url"][0], - version=vdct["version"], - release_date=vdct["release_date"], - ) + ) - return sdb_dct.values() + return [x for sublist in sdb_lst.values() for x in sublist] def _store_as_modelcif( @@ -472,7 +482,7 @@ def _store_as_modelcif( system.target_entities, model, sw_dct, - # ToDo: _storte_as_modelcif should not use __meta__, __meta__ is + # ToDo: _store_as_modelcif should not use __meta__, __meta__ is # tool specific _get_modelcif_ref_dbs(data_json["__meta__"]), ) -- GitLab