diff --git a/projects/dark-matter-metagenomics/translate2modelcif.py b/projects/dark-matter-metagenomics/translate2modelcif.py index e7f5102a72a651d1fc5d95f0a5b2991fd54000f1..d3d59886c1f4126cfc7eee104336a85a3e0a2833 100644 --- a/projects/dark-matter-metagenomics/translate2modelcif.py +++ b/projects/dark-matter-metagenomics/translate2modelcif.py @@ -23,7 +23,7 @@ import modelcif.model import modelcif.protocol import modelcif.reference -from ost import io, geom +from ost import io, geom, mol # EXAMPLE for running: @@ -144,7 +144,7 @@ def _compare_ent(ent1, ent2, at_occupancy_thresh=0.01, return False return True -def _compare_pdbs(f1, f2): +def _compare_pdbs(f_name, f1, f2): """Use atom-by-atom comparison on PDB files allowing num. errors.""" # first do simple file diff. if filecmp.cmp(f1, f2): @@ -153,7 +153,22 @@ def _compare_pdbs(f1, f2): ent1 = io.LoadPDB(f1) ent2 = io.LoadPDB(f2) # allow a bit more errors as input files can have rounding errors - return _compare_ent(ent1, ent2, 0.011, 0.011, 0.0011, True, False, True) + if _compare_ent(ent1, ent2, 0.011, 0.011, 0.0011, True, False, True): + return True + else: + # check manually and give warning... + atom_names_1 = [a.qualified_name for a in ent1.atoms] + atom_names_2 = [a.qualified_name for a in ent2.atoms] + assert atom_names_1 == atom_names_2 + b_diffs = [abs(a1.b_factor - a2.b_factor) \ + for a1, a2 in zip(ent1.atoms, ent2.atoms)] + max_b_diff = max(b_diffs) + rmsd = mol.alg.CalculateRMSD(ent1.Select(""), ent2.Select("")) + _warn_msg( + f"PDB file mismatch web vs top-ranked for {f_name}: " + f"RMSD {rmsd:.3f}, max. b_factor diff {max_b_diff:.3f}" + ) + return False ################################################################################ @@ -1057,11 +1072,8 @@ def _translate2modelcif_single( # sanity check (only for top ranked model!) if mdl_rank == 1 and opts.pdb_web_path is not None: pdb_file_web = os.path.join(opts.pdb_web_path, f"{f_name}.pdb") - if not _compare_pdbs(pdb_file, pdb_file_web): - # for now just as warning...(TODO: CHECK) - _warn_msg( - f"PDB file mismatch web vs top-ranked for " f"{f_name}" - ) + # warning handled in compare function... + _compare_pdbs(f_name, pdb_file, pdb_file_web) # get scores for this entry mdlcf_json["plddt_global"] = metadata.pLDDT