diff --git a/modules/conop/doc/cleanup.rst b/modules/conop/doc/cleanup.rst new file mode 100644 index 0000000000000000000000000000000000000000..35b20e5705b248a995ec61c5e06604c53df0767f --- /dev/null +++ b/modules/conop/doc/cleanup.rst @@ -0,0 +1,8 @@ +:mod:`conop.cleanup <ost.conop.cleanup>` -- Sanitize structures +================================================================================ + +.. module:: ost.conop.ceanup + :synopsis: Contains functions to sanitize (cleanup) structures by using + information from the compound library. + +.. autofunction:: ost.conop.cleanup.Cleanup \ No newline at end of file diff --git a/modules/conop/doc/conop.rst b/modules/conop/doc/conop.rst index 8c3f413469872a1d95f9da98e37f4e21ffb24bfc..fb72db2645a92ea70f3db4f686c0f33a35c7e956 100644 --- a/modules/conop/doc/conop.rst +++ b/modules/conop/doc/conop.rst @@ -20,3 +20,4 @@ In this module aminoacid connectivity compoundlib + cleanup \ No newline at end of file diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt index 5affa4b457d780b7368b34a097bec832d2c3f36c..97de72d7a52f1a74233a9782e746bd596620ffc7 100644 --- a/modules/conop/pymod/CMakeLists.txt +++ b/modules/conop/pymod/CMakeLists.txt @@ -7,4 +7,4 @@ set(OST_CONOP_PYMOD_SOURCES export_ring_finder.cc ) -pymod(NAME conop CPP ${OST_CONOP_PYMOD_SOURCES} PY __init__.py) \ No newline at end of file +pymod(NAME conop CPP ${OST_CONOP_PYMOD_SOURCES} PY __init__.py cleanup.py) diff --git a/modules/conop/pymod/cleanup.py b/modules/conop/pymod/cleanup.py new file mode 100644 index 0000000000000000000000000000000000000000..9ecf9c7ef12b8bb4fce889a5fb8062aab2364eda --- /dev/null +++ b/modules/conop/pymod/cleanup.py @@ -0,0 +1,167 @@ +from ost import conop, mol + +def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True): + """ + This function returns a cleaned-up (simplified) version of the protein + structure. Different parameters affect the behaviour of the function. + + :param strip_water: Whether to remove water from the structure + :param canonicalize: Whether to strip off modifications of amino acids and map + them back to their parent standard amino acid, e.g. selenium methionine to + methionine.For more complex amino acids, where the relation between the + modified and the standard parent amino acid is not known, sidechain atoms + are removed. D-peptide-linking residues are completely removed as well. + :param remove_ligands: Whether to remove ligands from the structure + + :return: a cleaned version of the entity + """ + #setup + builder = conop.GetBuilder() + if not hasattr(builder, "compound_lib") : + raise RuntimeError( "Cannot cleanup structure, since the default builder doesn't use the compound library") + compound_lib = builder.compound_lib + clean_entity = entity.Copy() + ed = clean_entity.EditXCS() + #remove water residues + if strip_water: + _StripWater(clean_entity, ed) + #replace modified residues before removing ligands to avoid removing MSE and others + if canonicalize: + _CanonicalizeResidues(clean_entity, ed, compound_lib) + #remove all hetatoms that are not water + if remove_ligands: + _RemoveLigands(clean_entity, ed) + return clean_entity + + +def _StripWater(clean_entity, ed) : + """ + This function removes water residues from the structure + """ + for res in clean_entity.residues: + if res.IsValid(): + if res.chem_class == mol.WATER: + ed.DeleteResidue(res.handle) + ed.UpdateICS() + return + +def _RemoveLigands(clean_entity, ed) : + """ + This function removes ligands from the structure + """ + for res in clean_entity.residues: + if res.IsValid(): + #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein() + if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER: + ed.DeleteResidue(res.handle) + ed.UpdateICS() + return + +def _CanonicalizeResidues(clean_entity, ed, compound_lib) : + """ + This function strips off modifications of amino acids and maps + them back to their parent standard amino acid, e.g. selenium methionine to + methionine.For more complex amino acids, where the relation between the + modified and the standard parent amino acid is not known, sidechain atoms + are removed. D-peptide-linking residues are completely removed as well. + """ + + for res in clean_entity.residues: + if res.IsValid() and res.IsPeptideLinking() : + parent_olc = res.one_letter_code + if parent_olc == "X" : + _DeleteSidechain(res, ed) + for atom in res.atoms: + atom.is_hetatom = False + else: + parent_tlc = conop.OneLetterCodeToResidueName(parent_olc) + parent_res = compound_lib.FindCompound(parent_tlc) + if not parent_res: + _DeleteSidechain(res, ed) + for atom in res.atoms: + atom.is_hetatom = False + print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc + else: + #collect atom's names + modif_atom_names = set([atom.name for atom in res.atoms + if atom.element != "H" and atom.element != "D" ]) + #if the res is the first or last take all the atoms from the parent res + if res.FindAtom("OXT").IsValid() : + parent_atom_names = set([atom.name for atom in parent_res.atom_specs + if atom.element != "H" and atom.element != "D" ]) + else: + parent_atom_names = set([atom.name for atom in parent_res.atom_specs + if atom.element != "H" and atom.element != "D" and not atom.is_leaving ]) + additional_parent_atoms = parent_atom_names - modif_atom_names + additional_modif_atoms = modif_atom_names - parent_atom_names + #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION + if res.atoms[0].is_hetatom : + old_name = res.name + ed.RenameResidue(res, parent_tlc) + if additional_parent_atoms: + if additional_modif_atoms: + #replacement + _Replacement(res, ed, old_name) + else: + #deletion + _Deletion(res, ed) + elif additional_modif_atoms: + #addition + _Addition(res, ed, additional_modif_atoms) + else: + #unchanged, later check stereochemistry or H atoms + _Unchanged(res, ed) + #the res is a peptide but not a ligand (is a protein res) + else: + if additional_parent_atoms:# if the sidechain is incomplete + _DeleteSidechain(res, ed) + ed.UpdateICS() + return + +def _Replacement(res, ed, old_name) : + #TEMP ONLY MSE + if old_name == "MSE" : + for atom in res.atoms: + atom.is_hetatom = False + sel = res.FindAtom("SE") + if sel.IsValid() : + ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2 + ed.DeleteAtom( sel ) + else: + _DeleteSidechain(res, ed) + else: + _DeleteSidechain(res, ed) + return + +def _Deletion(res, ed) : + _DeleteSidechain(res, ed) + for atom in res.atoms : + atom.is_hetatom = False + return + +def _Addition(res, ed, additional_modif_atoms) : + for add_atom_name in additional_modif_atoms: + add_atom = res.FindAtom( add_atom_name ) + if add_atom.IsValid() : + ed.DeleteAtom( add_atom ) + for atom in res.atoms: + atom.is_hetatom = False + return + +def _Unchanged(res, ed) : + if res.chem_class == mol.D_PEPTIDE_LINKING: + ed.DeleteResidue(res) + else: + _DeleteSidechain(res, ed) + for atom in res.atoms : + atom.is_hetatom = False + return + +def _DeleteSidechain(res, ed) : + for atom in res.atoms: + if not atom.name in ['CA','CB','C','N','O']: + ed.DeleteAtom(atom) + return + +#visible functions +__all__ = [Cleanup] diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index aa06791bf0be4d64640499010893d9db179f3e33..1af92df10be705d46354506d72df5c0c645be732 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -4,6 +4,7 @@ set(OST_CONOP_UNIT_TESTS tests.cc test_builder.cc test_compound.py + test_cleanup.py ) ost_unittest(MODULE conop diff --git a/modules/conop/tests/sample_noligands.pdb b/modules/conop/tests/sample_noligands.pdb new file mode 100644 index 0000000000000000000000000000000000000000..7fd199276d45124bbe2b2bc6ddf7d47b780d9401 --- /dev/null +++ b/modules/conop/tests/sample_noligands.pdb @@ -0,0 +1,65 @@ +HETATM 1 N MSE A 1 16.152 35.832 19.337 1.00 68.79 N +ANISOU 1 N MSE A 1 9680 7396 9061 1038 -716 -25 N +HETATM 2 CA MSE A 1 16.961 36.379 20.419 1.00 66.57 C +ANISOU 2 CA MSE A 1 9387 7212 8694 1121 -806 40 C +HETATM 3 C MSE A 1 18.345 36.796 19.931 1.00 62.52 C +ANISOU 3 C MSE A 1 8685 6846 8225 1266 -870 -107 C +HETATM 4 O MSE A 1 19.030 36.049 19.227 1.00 60.36 O +ANISOU 4 O MSE A 1 8358 6511 8064 1396 -922 -219 O +HETATM 5 CB MSE A 1 17.100 35.372 21.563 1.00 69.52 C +ANISOU 5 CB MSE A 1 9961 7405 9049 1204 -927 192 C +HETATM 6 CG MSE A 1 17.608 35.983 22.861 1.00 69.50 C +ANISOU 6 CG MSE A 1 9977 7515 8914 1239 -1006 292 C +HETATM 7 SE MSE A 1 16.174 36.321 24.145 0.70149.62 SE +ANISOU 7 SE MSE A 1 20306 17658 18883 1013 -917 492 SE +HETATM 8 CE MSE A 1 16.439 38.231 24.424 1.00101.32 C +ANISOU 8 CE MSE A 1 13992 11855 12651 950 -853 413 C +ATOM 9 N GLY A 2 21.960 55.913 14.093 1.00 32.26 N +ANISOU 9 N GLY A 2 3786 4717 3755 -417 -110 -697 N +ATOM 10 CA GLY A 2 21.067 57.037 14.316 1.00 33.47 C +ANISOU 10 CA GLY A 2 4071 4728 3919 -478 -95 -611 C +ATOM 11 C GLY A 2 20.632 57.066 15.769 1.00 28.81 C +ANISOU 11 C GLY A 2 3536 4062 3349 -382 -126 -599 C +ATOM 12 O GLY A 2 19.474 57.360 16.089 1.00 26.62 O +ANISOU 12 O GLY A 2 3358 3653 3104 -343 -120 -528 O +HETATM 694 N MLY A 3 26.382 48.690 2.460 1.00 30.43 N +ANISOU 694 N MLY A 3 2388 5919 3254 -646 317 -1852 N +HETATM 695 CA MLY A 3 27.776 48.333 2.142 1.00 32.62 C +ANISOU 695 CA MLY A 3 2438 6444 3514 -645 358 -2093 C +HETATM 696 CB MLY A 3 28.523 49.535 1.556 1.00 34.25 C +ANISOU 696 CB MLY A 3 2578 6874 3563 -918 442 -2107 C +HETATM 697 CG MLY A 3 28.053 50.058 0.208 1.00 51.47 C +ANISOU 697 CG MLY A 3 4819 9152 5586 -1157 527 -2046 C +HETATM 698 CD MLY A 3 29.170 50.911 -0.420 1.00 65.96 C +ANISOU 698 CD MLY A 3 6527 11268 7265 -1416 622 -2140 C +HETATM 699 CE MLY A 3 28.648 51.881 -1.478 1.00 78.91 C +ANISOU 699 CE MLY A 3 8300 12955 8728 -1697 684 -1985 C +HETATM 700 NZ MLY A 3 27.996 53.093 -0.885 1.00 83.04 N +ANISOU 700 NZ MLY A 3 9040 13267 9244 -1780 633 -1724 N +HETATM 701 CH1 MLY A 3 27.490 53.898 -2.008 1.00 82.17 C +ANISOU 701 CH1 MLY A 3 9059 13197 8964 -2028 679 -1578 C +HETATM 702 CH2 MLY A 3 29.059 53.885 -0.248 1.00 83.95 C +ANISOU 702 CH2 MLY A 3 9084 13473 9342 -1885 652 -1769 C +HETATM 703 C MLY A 3 28.551 47.875 3.386 1.00 35.38 C +ANISOU 703 C MLY A 3 2698 6760 3986 -436 275 -2175 C +HETATM 704 O MLY A 3 29.262 46.867 3.369 1.00 36.09 O +ANISOU 704 O MLY A 3 2632 6928 4151 -274 255 -2373 O +HETATM 24 N DHA A 4 26.289 27.329 2.438 1.00 21.02 N +HETATM 25 CA DHA A 4 26.295 27.688 3.823 1.00 20.17 C +HETATM 26 CB DHA A 4 27.128 28.481 4.578 1.00 25.40 C +HETATM 27 C DHA A 4 25.128 27.215 4.536 1.00 14.98 C +HETATM 28 O DHA A 4 24.918 27.318 5.770 1.00 15.17 O +ATOM 1454 N CYS A 5 35.381 45.298 39.476 1.00 31.23 N +ATOM 1455 CA CYS A 5 35.559 43.873 39.703 1.00 26.90 C +ATOM 1456 C CYS A 5 34.291 43.354 40.319 1.00 28.31 C +ATOM 1457 OXT CYS A 5 33.569 44.119 40.933 1.00 32.71 O +ATOM 1458 CB CYS A 5 36.760 43.592 40.596 1.00 27.44 C +ATOM 1460 H CYS A 5 34.717 45.766 40.024 1.00 0.00 H +HETATM 1345 N DAL A 6 16.130 53.915 24.417 1.00 8.63 N +HETATM 1346 CA DAL A 6 16.958 55.083 24.235 1.00 24.17 C +HETATM 1347 CB DAL A 6 16.321 56.394 24.733 1.00 30.20 C +HETATM 1348 C DAL A 6 17.335 55.218 22.790 1.00 32.54 C +HETATM 1349 O DAL A 6 16.693 54.552 21.946 1.00 27.41 O +HETATM 1350 OXT DAL A 6 18.286 55.960 22.546 1.00 18.81 O +HETATM 36 O HOH A 19 0.180 48.781 4.764 1.00 23.28 O +END diff --git a/modules/conop/tests/sample_nowater.pdb b/modules/conop/tests/sample_nowater.pdb new file mode 100644 index 0000000000000000000000000000000000000000..0f8c440aa83c10dec2cfffe71cbe524a15d0af15 --- /dev/null +++ b/modules/conop/tests/sample_nowater.pdb @@ -0,0 +1,71 @@ +HETATM 1 N MSE A 1 16.152 35.832 19.337 1.00 68.79 N +ANISOU 1 N MSE A 1 9680 7396 9061 1038 -716 -25 N +HETATM 2 CA MSE A 1 16.961 36.379 20.419 1.00 66.57 C +ANISOU 2 CA MSE A 1 9387 7212 8694 1121 -806 40 C +HETATM 3 C MSE A 1 18.345 36.796 19.931 1.00 62.52 C +ANISOU 3 C MSE A 1 8685 6846 8225 1266 -870 -107 C +HETATM 4 O MSE A 1 19.030 36.049 19.227 1.00 60.36 O +ANISOU 4 O MSE A 1 8358 6511 8064 1396 -922 -219 O +HETATM 5 CB MSE A 1 17.100 35.372 21.563 1.00 69.52 C +ANISOU 5 CB MSE A 1 9961 7405 9049 1204 -927 192 C +HETATM 6 CG MSE A 1 17.608 35.983 22.861 1.00 69.50 C +ANISOU 6 CG MSE A 1 9977 7515 8914 1239 -1006 292 C +HETATM 7 SE MSE A 1 16.174 36.321 24.145 0.70149.62 SE +ANISOU 7 SE MSE A 1 20306 17658 18883 1013 -917 492 SE +HETATM 8 CE MSE A 1 16.439 38.231 24.424 1.00101.32 C +ANISOU 8 CE MSE A 1 13992 11855 12651 950 -853 413 C +ATOM 9 N GLY A 2 21.960 55.913 14.093 1.00 32.26 N +ANISOU 9 N GLY A 2 3786 4717 3755 -417 -110 -697 N +ATOM 10 CA GLY A 2 21.067 57.037 14.316 1.00 33.47 C +ANISOU 10 CA GLY A 2 4071 4728 3919 -478 -95 -611 C +ATOM 11 C GLY A 2 20.632 57.066 15.769 1.00 28.81 C +ANISOU 11 C GLY A 2 3536 4062 3349 -382 -126 -599 C +ATOM 12 O GLY A 2 19.474 57.360 16.089 1.00 26.62 O +ANISOU 12 O GLY A 2 3358 3653 3104 -343 -120 -528 O +HETATM 694 N MLY A 3 26.382 48.690 2.460 1.00 30.43 N +ANISOU 694 N MLY A 3 2388 5919 3254 -646 317 -1852 N +HETATM 695 CA MLY A 3 27.776 48.333 2.142 1.00 32.62 C +ANISOU 695 CA MLY A 3 2438 6444 3514 -645 358 -2093 C +HETATM 696 CB MLY A 3 28.523 49.535 1.556 1.00 34.25 C +ANISOU 696 CB MLY A 3 2578 6874 3563 -918 442 -2107 C +HETATM 697 CG MLY A 3 28.053 50.058 0.208 1.00 51.47 C +ANISOU 697 CG MLY A 3 4819 9152 5586 -1157 527 -2046 C +HETATM 698 CD MLY A 3 29.170 50.911 -0.420 1.00 65.96 C +ANISOU 698 CD MLY A 3 6527 11268 7265 -1416 622 -2140 C +HETATM 699 CE MLY A 3 28.648 51.881 -1.478 1.00 78.91 C +ANISOU 699 CE MLY A 3 8300 12955 8728 -1697 684 -1985 C +HETATM 700 NZ MLY A 3 27.996 53.093 -0.885 1.00 83.04 N +ANISOU 700 NZ MLY A 3 9040 13267 9244 -1780 633 -1724 N +HETATM 701 CH1 MLY A 3 27.490 53.898 -2.008 1.00 82.17 C +ANISOU 701 CH1 MLY A 3 9059 13197 8964 -2028 679 -1578 C +HETATM 702 CH2 MLY A 3 29.059 53.885 -0.248 1.00 83.95 C +ANISOU 702 CH2 MLY A 3 9084 13473 9342 -1885 652 -1769 C +HETATM 703 C MLY A 3 28.551 47.875 3.386 1.00 35.38 C +ANISOU 703 C MLY A 3 2698 6760 3986 -436 275 -2175 C +HETATM 704 O MLY A 3 29.262 46.867 3.369 1.00 36.09 O +ANISOU 704 O MLY A 3 2632 6928 4151 -274 255 -2373 O +HETATM 24 N DHA A 4 26.289 27.329 2.438 1.00 21.02 N +HETATM 25 CA DHA A 4 26.295 27.688 3.823 1.00 20.17 C +HETATM 26 CB DHA A 4 27.128 28.481 4.578 1.00 25.40 C +HETATM 27 C DHA A 4 25.128 27.215 4.536 1.00 14.98 C +HETATM 28 O DHA A 4 24.918 27.318 5.770 1.00 15.17 O +ATOM 1454 N CYS A 5 35.381 45.298 39.476 1.00 31.23 N +ATOM 1455 CA CYS A 5 35.559 43.873 39.703 1.00 26.90 C +ATOM 1456 C CYS A 5 34.291 43.354 40.319 1.00 28.31 C +ATOM 1457 OXT CYS A 5 33.569 44.119 40.933 1.00 32.71 O +ATOM 1458 CB CYS A 5 36.760 43.592 40.596 1.00 27.44 C +ATOM 1460 H CYS A 5 34.717 45.766 40.024 1.00 0.00 H +HETATM 1345 N DAL A 6 16.130 53.915 24.417 1.00 8.63 N +HETATM 1346 CA DAL A 6 16.958 55.083 24.235 1.00 24.17 C +HETATM 1347 CB DAL A 6 16.321 56.394 24.733 1.00 30.20 C +HETATM 1348 C DAL A 6 17.335 55.218 22.790 1.00 32.54 C +HETATM 1349 O DAL A 6 16.693 54.552 21.946 1.00 27.41 O +HETATM 1350 OXT DAL A 6 18.286 55.960 22.546 1.00 18.81 O +HETATM 29 C1 GOL A 17 3.793 59.768 8.209 1.00 31.00 C +HETATM 30 O1 GOL A 17 3.244 58.473 8.337 1.00 27.42 O +HETATM 31 C2 GOL A 17 4.701 60.020 9.406 1.00 26.81 C +HETATM 32 O2 GOL A 17 5.573 58.919 9.512 1.00 26.44 O +HETATM 33 C3 GOL A 17 5.505 61.287 9.156 1.00 24.74 C +HETATM 34 O3 GOL A 17 6.429 61.468 10.222 1.00 31.06 O +HETATM 35 CL CL A 18 11.844 59.221 16.755 0.79 32.84 CL +END diff --git a/modules/conop/tests/sample_test_cleanup.pdb b/modules/conop/tests/sample_test_cleanup.pdb new file mode 100644 index 0000000000000000000000000000000000000000..cce7fe04d4fe753d68c9e73620fea8617ce07059 --- /dev/null +++ b/modules/conop/tests/sample_test_cleanup.pdb @@ -0,0 +1,72 @@ +HETATM 1 N MSE A 1 16.152 35.832 19.337 1.00 68.79 N +ANISOU 1 N MSE A 1 9680 7396 9061 1038 -716 -25 N +HETATM 2 CA MSE A 1 16.961 36.379 20.419 1.00 66.57 C +ANISOU 2 CA MSE A 1 9387 7212 8694 1121 -806 40 C +HETATM 3 C MSE A 1 18.345 36.796 19.931 1.00 62.52 C +ANISOU 3 C MSE A 1 8685 6846 8225 1266 -870 -107 C +HETATM 4 O MSE A 1 19.030 36.049 19.227 1.00 60.36 O +ANISOU 4 O MSE A 1 8358 6511 8064 1396 -922 -219 O +HETATM 5 CB MSE A 1 17.100 35.372 21.563 1.00 69.52 C +ANISOU 5 CB MSE A 1 9961 7405 9049 1204 -927 192 C +HETATM 6 CG MSE A 1 17.608 35.983 22.861 1.00 69.50 C +ANISOU 6 CG MSE A 1 9977 7515 8914 1239 -1006 292 C +HETATM 7 SE MSE A 1 16.174 36.321 24.145 0.70149.62 SE +ANISOU 7 SE MSE A 1 20306 17658 18883 1013 -917 492 SE +HETATM 8 CE MSE A 1 16.439 38.231 24.424 1.00101.32 C +ANISOU 8 CE MSE A 1 13992 11855 12651 950 -853 413 C +ATOM 9 N GLY A 2 21.960 55.913 14.093 1.00 32.26 N +ANISOU 9 N GLY A 2 3786 4717 3755 -417 -110 -697 N +ATOM 10 CA GLY A 2 21.067 57.037 14.316 1.00 33.47 C +ANISOU 10 CA GLY A 2 4071 4728 3919 -478 -95 -611 C +ATOM 11 C GLY A 2 20.632 57.066 15.769 1.00 28.81 C +ANISOU 11 C GLY A 2 3536 4062 3349 -382 -126 -599 C +ATOM 12 O GLY A 2 19.474 57.360 16.089 1.00 26.62 O +ANISOU 12 O GLY A 2 3358 3653 3104 -343 -120 -528 O +HETATM 694 N MLY A 3 26.382 48.690 2.460 1.00 30.43 N +ANISOU 694 N MLY A 3 2388 5919 3254 -646 317 -1852 N +HETATM 695 CA MLY A 3 27.776 48.333 2.142 1.00 32.62 C +ANISOU 695 CA MLY A 3 2438 6444 3514 -645 358 -2093 C +HETATM 696 CB MLY A 3 28.523 49.535 1.556 1.00 34.25 C +ANISOU 696 CB MLY A 3 2578 6874 3563 -918 442 -2107 C +HETATM 697 CG MLY A 3 28.053 50.058 0.208 1.00 51.47 C +ANISOU 697 CG MLY A 3 4819 9152 5586 -1157 527 -2046 C +HETATM 698 CD MLY A 3 29.170 50.911 -0.420 1.00 65.96 C +ANISOU 698 CD MLY A 3 6527 11268 7265 -1416 622 -2140 C +HETATM 699 CE MLY A 3 28.648 51.881 -1.478 1.00 78.91 C +ANISOU 699 CE MLY A 3 8300 12955 8728 -1697 684 -1985 C +HETATM 700 NZ MLY A 3 27.996 53.093 -0.885 1.00 83.04 N +ANISOU 700 NZ MLY A 3 9040 13267 9244 -1780 633 -1724 N +HETATM 701 CH1 MLY A 3 27.490 53.898 -2.008 1.00 82.17 C +ANISOU 701 CH1 MLY A 3 9059 13197 8964 -2028 679 -1578 C +HETATM 702 CH2 MLY A 3 29.059 53.885 -0.248 1.00 83.95 C +ANISOU 702 CH2 MLY A 3 9084 13473 9342 -1885 652 -1769 C +HETATM 703 C MLY A 3 28.551 47.875 3.386 1.00 35.38 C +ANISOU 703 C MLY A 3 2698 6760 3986 -436 275 -2175 C +HETATM 704 O MLY A 3 29.262 46.867 3.369 1.00 36.09 O +ANISOU 704 O MLY A 3 2632 6928 4151 -274 255 -2373 O +HETATM 24 N DHA A 4 26.289 27.329 2.438 1.00 21.02 N +HETATM 25 CA DHA A 4 26.295 27.688 3.823 1.00 20.17 C +HETATM 26 CB DHA A 4 27.128 28.481 4.578 1.00 25.40 C +HETATM 27 C DHA A 4 25.128 27.215 4.536 1.00 14.98 C +HETATM 28 O DHA A 4 24.918 27.318 5.770 1.00 15.17 O +ATOM 1454 N CYS A 5 35.381 45.298 39.476 1.00 31.23 N +ATOM 1455 CA CYS A 5 35.559 43.873 39.703 1.00 26.90 C +ATOM 1456 C CYS A 5 34.291 43.354 40.319 1.00 28.31 C +ATOM 1457 OXT CYS A 5 33.569 44.119 40.933 1.00 32.71 O +ATOM 1458 CB CYS A 5 36.760 43.592 40.596 1.00 27.44 C +ATOM 1460 H CYS A 5 34.717 45.766 40.024 1.00 0.00 H +HETATM 1345 N DAL A 6 16.130 53.915 24.417 1.00 8.63 N +HETATM 1346 CA DAL A 6 16.958 55.083 24.235 1.00 24.17 C +HETATM 1347 CB DAL A 6 16.321 56.394 24.733 1.00 30.20 C +HETATM 1348 C DAL A 6 17.335 55.218 22.790 1.00 32.54 C +HETATM 1349 O DAL A 6 16.693 54.552 21.946 1.00 27.41 O +HETATM 1350 OXT DAL A 6 18.286 55.960 22.546 1.00 18.81 O +HETATM 29 C1 GOL A 17 3.793 59.768 8.209 1.00 31.00 C +HETATM 30 O1 GOL A 17 3.244 58.473 8.337 1.00 27.42 O +HETATM 31 C2 GOL A 17 4.701 60.020 9.406 1.00 26.81 C +HETATM 32 O2 GOL A 17 5.573 58.919 9.512 1.00 26.44 O +HETATM 33 C3 GOL A 17 5.505 61.287 9.156 1.00 24.74 C +HETATM 34 O3 GOL A 17 6.429 61.468 10.222 1.00 31.06 O +HETATM 35 CL CL A 18 11.844 59.221 16.755 0.79 32.84 CL +HETATM 36 O HOH A 19 0.180 48.781 4.764 1.00 23.28 O +END diff --git a/modules/conop/tests/test_cleanup.py b/modules/conop/tests/test_cleanup.py new file mode 100644 index 0000000000000000000000000000000000000000..ee6ab57cae413a550e1cc6abdbc7875a1d7d3210 --- /dev/null +++ b/modules/conop/tests/test_cleanup.py @@ -0,0 +1,162 @@ +import unittest +from ost import geom, conop +from ost.conop import cleanup + +class TestCleanUp(unittest.TestCase): + + def setUp(self): + self.comp_lib=conop.GetBuilder().compound_lib + self.ent = io.LoadPDB("sample_test_cleanup.pdb") + self.ent_no_wat = io.LoadPDB("sample_nowater.pdb") + self.ent_no_lig = io.LoadPDB("sample_noligands.pdb") + + def testStripWater(self): + self.new_ent = cleanup.Cleanup(self.ent, strip_water=True, canonicalize=False, remove_ligands=False) + self.assertEqual( self.new_ent.residue_count, self.ent_no_wat.residue_count ) + self.assertTrue( self.new_ent.residues[0].IsValid() ) + self.assertEqual( self.new_ent.residues[0].qualified_name, self.ent_no_wat.residues[0].qualified_name) + self.assertTrue( self.new_ent.residues[1].IsValid() ) + self.assertEqual( self.new_ent.residues[1].qualified_name, self.ent_no_wat.residues[1].qualified_name) + self.assertTrue( self.new_ent.residues[2].IsValid() ) + self.assertEqual( self.new_ent.residues[2].qualified_name, self.ent_no_wat.residues[2].qualified_name) + self.assertTrue( self.new_ent.residues[3].IsValid() ) + self.assertEqual( self.new_ent.residues[3].qualified_name, self.ent_no_wat.residues[3].qualified_name) + self.assertTrue( self.new_ent.residues[4].IsValid() ) + self.assertEqual( self.new_ent.residues[4].qualified_name, self.ent_no_wat.residues[4].qualified_name) + self.assertTrue( self.new_ent.residues[5].IsValid() ) + self.assertEqual( self.new_ent.residues[5].qualified_name, self.ent_no_wat.residues[5].qualified_name) + self.assertTrue( self.new_ent.residues[6].IsValid() ) + self.assertEqual( self.new_ent.residues[6].qualified_name, self.ent_no_wat.residues[6].qualified_name) + self.assertTrue( self.new_ent.residues[7].IsValid() ) + self.assertEqual( self.new_ent.residues[7].qualified_name, self.ent_no_wat.residues[7].qualified_name) + + def testCanonicalize(self): + self.new_ent = cleanup.Cleanup(self.ent, strip_water=False, canonicalize=True, remove_ligands=False) + #standard residue must be the same + self.gly = self.ent.residues[1] + self.new_gly = self.new_ent.residues[1] + self.assertTrue(self.new_gly.IsValid()) + self.assertTrue(self.new_gly.IsPeptideLinking()) + self.assertEqual(self.gly.atom_count, self.new_gly.atom_count) + #TEMP del sidechain of incomplete residue and OXT if present + self.new_cys = self.new_ent.residues[4] + self.new_cys_atoms = set([atm.name for atm in self.new_cys.atoms]) + self.assertEqual( len(self.new_cys_atoms), 4, msg = repr(self.new_cys_atoms)) + self.assertTrue( "CB" in self.new_cys_atoms) + self.assertTrue( "CA" in self.new_cys_atoms) + self.assertTrue( "C" in self.new_cys_atoms) + self.assertFalse( "OXT" in self.new_cys_atoms) + self.assertTrue( "N" in self.new_cys_atoms) + #test replacement of atoms + self.mse = self.ent.residues[0] +# self.assertTrue( self.mse.IsValid()) +# self.assertTrue( self.mse.IsPeptideLinking()) + self.sel = self.mse.FindAtom("SE") +# self.assertTrue( self.sel.IsValid()) + self.met = self.new_ent.residues[0] + self.assertTrue(self.met.IsValid()) + self.assertEqual(self.mse.atom_count, self.met.atom_count) + self.assertEqual(self.met.name, "MET") + self.assertEqual(self.met.one_letter_code, "M") + self.assertTrue(self.met.IsPeptideLinking()) + self.sul = self.met.FindAtom("SD") + self.assertTrue(self.sul.IsValid()) + self.assertTrue(geom.Equal(self.sul.pos,self.sel.pos), msg = "sul:%s sel:%s"%(str(self.sul.pos), str(self.sel.pos)) ) + self.assertEqual(self.sul.element, "S") +# self.AssertTrue( sul.mass == conop.Conopology.Instance().GetDefaultAtomMass("S")) +# self.AssertTrue( sul.radius == conop.Conopology.Instance().GetDefaultAtomRadius("S")) + for atm in self.met.atoms: + self.assertFalse( atm.is_hetatom) + #test addition + self.mly = self.ent.residues[2] +# self.assertTrue( self.mly.IsValid()) +# self.assertTrue( self.mly.IsPeptideLinking()) + self.new_lys = self.new_ent.residues[2] + self.assertTrue(self.new_lys.IsValid()) + self.assertTrue(self.new_lys.IsPeptideLinking()) + self.assertEqual(self.new_lys.name, "LYS") + self.assertEqual(self.new_lys.one_letter_code, "K") + self.new_lys_atoms = set([atm.name for atm in self.new_lys.atoms]) + self.canon_lys = self.comp_lib.FindCompound("LYS") + self.canon_lys_atoms = set([atom.name for atom in self.canon_lys.atom_specs + if atom.element != "H" and atom.element != "D" and not atom.is_leaving ]) + self.assertEqual(self.canon_lys_atoms, self.new_lys_atoms) + self.assertFalse(self.canon_lys_atoms - self.new_lys_atoms) + self.assertFalse(self.new_lys_atoms - self.canon_lys_atoms) #test the reverse + for atm in self.new_lys.atoms: + self.assertFalse( atm.is_hetatom) + #deletions + self.dha = self.ent.residues[3] +# self.assertTrue( self.dha.IsValid()) +# self.assertTrue( self.dha.IsPeptideLinking()) + self.new_ser = self.new_ent.residues[3] + self.assertTrue(self.new_ser.IsValid()) + self.assertTrue(self.new_ser.IsPeptideLinking()) + self.assertEqual(self.new_ser.name, "SER") + self.assertEqual(self.new_ser.one_letter_code, "S") + self.new_ser_atoms = set([atm.name for atm in self.new_ser.atoms]) + self.canon_ser = self.comp_lib.FindCompound("SER") + self.canon_ser_atoms = set([atom.name for atom in self.canon_ser.atom_specs + if atom.element != "H" and atom.element != "D" and not atom.is_leaving ]) + #TEMP + self.assertEqual( len(self.new_ser_atoms), 5) + self.assertTrue( "CB" in self.new_ser_atoms) + self.assertTrue( "CA" in self.new_ser_atoms) + self.assertTrue( "C" in self.new_ser_atoms) + self.assertTrue( "O" in self.new_ser_atoms) + self.assertTrue( "N" in self.new_ser_atoms) + #AFTER TEMP + #self.assertEqual( self.canon_ser_atoms, self.new_ser_atoms) + #self.assertFalse(self.canon_ser_atoms - self.new_ser_atoms) + #self.assertFalse(self.new_ser_atoms - self.canon_ser_atoms) #test the reverse + for atm in self.new_ser.atoms: + self.assertFalse( atm.is_hetatom) + #test deletion of whole residue + self.assertEqual(self.ent.residues[5].chem_class, "D_PEPTIDE_LINKING") + self.assertNotEqual(self.new_ent.residues[5].name, "DAL") + self.assertNotEqual(self.ent.residue_count, self.new_ent.residue_count) + + def testRemoveLigands(self): + self.new_ent = cleanup.Cleanup(self.ent, strip_water=False, canonicalize=False, remove_ligands=True) + self.assertEqual(self.new_ent.residue_count, self.ent_no_lig.residue_count ) + #MSE + self.assertTrue(self.new_ent.residues[0].IsValid() ) + self.assertEqual(self.new_ent.residues[0].qualified_name, self.ent_no_lig.residues[0].qualified_name) + self.assertTrue(self.new_ent.residues[0].IsPeptideLinking()) + self.assertTrue(self.new_ent.residues[0].atoms[0].is_hetatom) + #GLY + self.assertTrue(self.new_ent.residues[1].IsValid() ) + self.assertEqual(self.new_ent.residues[1].qualified_name, self.ent_no_lig.residues[1].qualified_name) + self.assertTrue(self.new_ent.residues[1].IsPeptideLinking()) + self.assertFalse(self.new_ent.residues[1].atoms[0].is_hetatom) + #MLY + self.assertTrue(self.new_ent.residues[2].IsValid() ) + self.assertEqual(self.new_ent.residues[2].qualified_name, self.ent_no_lig.residues[2].qualified_name) + self.assertTrue(self.new_ent.residues[2].IsPeptideLinking()) + self.assertTrue(self.new_ent.residues[2].atoms[0].is_hetatom) + #DHA + self.assertTrue(self.new_ent.residues[3].IsValid() ) + self.assertEqual(self.new_ent.residues[3].qualified_name, self.ent_no_lig.residues[3].qualified_name) + self.assertTrue(self.new_ent.residues[3].IsPeptideLinking()) + self.assertTrue(self.new_ent.residues[3].atoms[0].is_hetatom) + #CYS + self.assertTrue(self.new_ent.residues[4].IsValid() ) + self.assertEqual(self.new_ent.residues[4].qualified_name, self.ent_no_lig.residues[4].qualified_name) + self.assertTrue(self.new_ent.residues[4].IsPeptideLinking()) + self.assertFalse(self.new_ent.residues[4].atoms[0].is_hetatom) + #DAL + self.assertTrue(self.new_ent.residues[5].IsValid() ) + self.assertEqual(self.new_ent.residues[5].qualified_name, self.ent_no_lig.residues[5].qualified_name) + self.assertTrue(self.new_ent.residues[5].IsPeptideLinking()) + self.assertTrue(self.new_ent.residues[5].atoms[0].is_hetatom) + #HOH + self.assertTrue(self.new_ent.residues[6].IsValid() ) + self.assertEqual(self.new_ent.residues[6].qualified_name, self.ent_no_lig.residues[6].qualified_name) + self.assertFalse(self.new_ent.residues[6].IsPeptideLinking()) # here assertFalse instead of assertTrue + self.assertTrue(self.new_ent.residues[6].atoms[0].is_hetatom) + +if not hasattr(conop.GetBuilder(), 'compound_lib'): + print 'Default builder without compound lib. Ignoring test_cleanup.py tests' + sys.exit() +suite = unittest.TestLoader().loadTestsFromTestCase(TestCleanUp) +unittest.TextTestRunner().run(suite) diff --git a/modules/mol/base/pymod/export_residue.cc b/modules/mol/base/pymod/export_residue.cc index 1e1b9f74a60d94c224b9aad51153a9e873b2a4a7..75d74bdb577c618094ee18223102dfa56f932abe 100644 --- a/modules/mol/base/pymod/export_residue.cc +++ b/modules/mol/base/pymod/export_residue.cc @@ -64,6 +64,13 @@ namespace { void export_Residue() { + class_<ChemClass>("ChemClass", init<char>(args("chem_class"))) + .def(self!=self) + .def(self==self) + .def("IsPeptideLinking", &ChemClass::IsPeptideLinking) + .def("IsNucleotideLinking", &ChemClass::IsNucleotideLinking) + ; + implicitly_convertible<char, ChemClass>(); class_<ResNum>("ResNum", init<int>(args("num"))) .def(init<int,char>(args("num", "ins_code"))) @@ -149,8 +156,8 @@ void export_Residue() .def("GetNumber", &ResidueBase::GetNumber, return_value_policy<copy_const_reference>()) .def("GetChemClass", &ResidueBase::GetChemClass) + .add_property("chem_class", &ResidueBase::GetChemClass, set_chemclass) .def("SetChemClass", set_chemclass) - .add_property("chem_class",&ResidueBase::GetChemClass,set_chemclass) .add_property("is_ligand", &ResidueBase::IsLigand, &ResidueBase::SetIsLigand) .def("IsLigand", &ResidueBase::IsLigand) .def("SetIsLigand", &ResidueBase::SetIsLigand)