From 074bbd476c8f7dab9f837c1e934d7952b4d5e714 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Thu, 22 Dec 2022 08:38:57 +0100 Subject: [PATCH] OMF unit tests Add unit tests to extract several biounits and compare with PDBize. Problem here is that PDBize has some non-standard logics implemented. Maybe needs some thinking on how the biounit business should be solved in OMF. Or drop it alltogether and make it meta information? --- modules/io/tests/test_io_omf.py | 69 ++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/modules/io/tests/test_io_omf.py b/modules/io/tests/test_io_omf.py index bbb954a61..840d9f42d 100644 --- a/modules/io/tests/test_io_omf.py +++ b/modules/io/tests/test_io_omf.py @@ -20,11 +20,12 @@ def compare_atoms(a1, a2, occupancy_thresh = 0.01, bfactor_thresh = 0.01, def compare_residues(r1, r2, at_occupancy_thresh = 0.01, at_bfactor_thresh = 0.01, at_dist_thresh = 0.001, - skip_ss = False): + skip_ss = False, skip_rnums=False): if r1.GetName() != r2.GetName(): return False - if r1.GetNumber() != r2.GetNumber(): - return False + if skip_rnums is False: + if r1.GetNumber() != r2.GetNumber(): + return False if skip_ss is False: if str(r1.GetSecStructure()) != str(r2.GetSecStructure()): return False @@ -51,7 +52,7 @@ def compare_residues(r1, r2, at_occupancy_thresh = 0.01, def compare_chains(ch1, ch2, at_occupancy_thresh = 0.01, at_bfactor_thresh = 0.01, at_dist_thresh = 0.001, - skip_ss=False): + skip_ss=False, skip_rnums=False): if len(ch1.residues) != len(ch2.residues): return False for r1, r2 in zip(ch1.residues, ch2.residues): @@ -59,7 +60,7 @@ def compare_chains(ch1, ch2, at_occupancy_thresh = 0.01, at_occupancy_thresh = at_occupancy_thresh, at_bfactor_thresh = at_bfactor_thresh, at_dist_thresh = at_dist_thresh, - skip_ss = skip_ss): + skip_ss = skip_ss, skip_rnums=skip_rnums): return False return True @@ -76,23 +77,27 @@ def compare_bonds(ent1, ent2): def compare_ent(ent1, ent2, at_occupancy_thresh = 0.01, at_bfactor_thresh = 0.01, at_dist_thresh = 0.001, - skip_ss=False): + skip_ss=False, skip_cnames = False, skip_bonds = False, + skip_rnums=False): chain_names_one = [ch.GetName() for ch in ent1.chains] chain_names_two = [ch.GetName() for ch in ent2.chains] - if not sorted(chain_names_one) == sorted(chain_names_two): - return False - chain_names = chain_names_one - for chain_name in chain_names: - ch1 = ent1.FindChain(chain_name) - ch2 = ent2.FindChain(chain_name) + if skip_cnames: + # only check whether we have the same number of chains + if len(chain_names_one) != len(chain_names_two): + return False + else: + if chain_names_one != chain_names_two: + return False + for ch1, ch2 in zip(ent1.chains, ent2.chains): if not compare_chains(ch1, ch2, at_occupancy_thresh = at_occupancy_thresh, at_bfactor_thresh = at_bfactor_thresh, at_dist_thresh = at_dist_thresh, - skip_ss=skip_ss): + skip_ss=skip_ss, skip_rnums=skip_rnums): + return False + if not skip_bonds: + if not compare_bonds(ent1, ent2): return False - if not compare_bonds(ent1, ent2): - return False return True class TestOMF(unittest.TestCase): @@ -200,6 +205,40 @@ class TestOMF(unittest.TestCase): self.assertTrue(len(omf_infer_pep_bonds_bytes) < len(omf_bytes)) self.assertTrue(compare_ent(self.ent, loaded_ent)) + def test_multiple_BU(self): + ent, seqres, info = io.LoadMMCIF("testfiles/mmcif/3imj.cif.gz", + seqres=True, + info=True) + + omf = io.OMF.FromMMCIF(ent, info) + omf_bytes = omf.ToBytes() + omf_loaded = io.OMF.FromBytes(omf_bytes) + + # there are quite some discrepancies between PDBize and OMF + # - chain names: PDBize has specific chain names for ligands and + # water etc. OMF just iterates A, B, C, D, ... + # - skip_bonds: Thats qualified atom name based. PDBize used rnums + # and insertion codes for waters... + # - skip_rnums: Again, insertion codes for waters... + self.assertTrue(compare_ent(omf_loaded.GetBU(0), + info.GetBioUnits()[0].PDBize(ent), + skip_cnames=True, skip_bonds=True, + skip_rnums=True)) + + self.assertTrue(compare_ent(omf_loaded.GetBU(1), + info.GetBioUnits()[1].PDBize(ent), + skip_cnames=True, skip_bonds=True, + skip_rnums=True)) + + # no check for the full guy... problem: PDBize throws all water + # molecules in the same chain, whereas OMF keeps them separate + # as in the chains from the assymetric unit... maybe needs some + # thinking on how to resolve discrepancies between PDBize and OMF + #self.assertTrue(compare_ent(omf_loaded.GetBU(2), + # info.GetBioUnits()[2].PDBize(ent), + # skip_cnames=True, skip_bonds=True, + # skip_rnums=True)) + if __name__== '__main__': from ost import testutils if testutils.SetDefaultCompoundLib(): -- GitLab