From d0f67460ae7825bdb830b2d232b4fb6adaeb6c0c Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Fri, 24 Mar 2023 15:40:53 +0100 Subject: [PATCH] Correctly read/write CONECT in PDB files - Thanks Simon Sun Simon found issues when reading and writing CONECT statements in PDB io and provided code to fix it. yay Reading: OpenStructure did not read CONECT statements at all. By default, it still doesnt. Many PDB files out there don't provide these statements and we really don't want to rely on them. We rather want to assign connectivity based on the chemical component dictionary from the PDB. However, a valid use case are novel compounds that are not in the component dictionary. Reading connect statements can now be enabled in the pdb reader through the IOProfile. This may give issues in processing after reading. OpenStructure implements processors that are responsible for connectivity. Now that we build some of the connectivity already at the reading stage, this might cause trouble. To remedy most of the nightmares that can come out of that, the processors can now optionally skip connectivities between Hetatoms. Writing: That was a plain bug when writing CONECT statements for bond orders > 1. --- modules/conop/doc/connectivity.rst | 18 ++++- modules/conop/pymod/export_heuristic.cc | 3 +- modules/conop/pymod/export_processor.cc | 2 + modules/conop/pymod/export_rule_based.cc | 3 +- modules/conop/src/heuristic.hh | 5 +- modules/conop/src/processor.cc | 6 ++ modules/conop/src/processor.hh | 24 ++++--- modules/conop/src/rule_based.hh | 4 +- modules/conop/tests/test_heuristic_conop.cc | 44 ++++++++++++ modules/conop/tests/test_rule_based_conop.cc | 51 +++++++++++++- modules/io/doc/profile.rst | 12 +++- modules/io/pymod/__init__.py | 16 ++++- modules/io/pymod/export_pdb_io.cc | 4 +- modules/io/src/mol/io_profile.hh | 10 +-- modules/io/src/mol/pdb_reader.cc | 74 ++++++++++++++++++-- modules/io/src/mol/pdb_reader.hh | 4 +- modules/io/src/mol/pdb_writer.cc | 18 ++--- modules/io/tests/test_io_pdb.cc | 14 ++-- modules/io/tests/test_io_pdb.py | 31 +++++++- modules/io/tests/testfiles/pdb/conect.pdb | 28 ++++---- modules/mol/mm/src/observer.cc | 2 +- 21 files changed, 312 insertions(+), 61 deletions(-) diff --git a/modules/conop/doc/connectivity.rst b/modules/conop/doc/connectivity.rst index f04e978a5..0dfd00590 100644 --- a/modules/conop/doc/connectivity.rst +++ b/modules/conop/doc/connectivity.rst @@ -94,6 +94,15 @@ be extended with custom connectivity information, if required. :type: :class:`ConopAction` + .. attribute:: connect_hetatm + + :type: :class:`bool` + + Whether to connect atoms that are both hetatms. Enabled by default. + Disabling can be useful if there are compounds which are not covered + by the PDB component dictionary and you prefer to create your own + connectivity for those. + .. method:: Process(ent) Processess the entity *ent* according to the current options. @@ -101,7 +110,9 @@ be extended with custom connectivity information, if required. .. class:: HeuristicProcessor(check_bond_feasibility=False, \ assign_torsions=True, connect=True, \ - peptide_bonds=True, zero_occ_treatment=CONOP_WARN) + peptide_bonds=True, + connect_hetatm=True, + zero_occ_treatment=CONOP_WARN) The :class:`HeuristicProcessor` implements the :class:`Processor` interface. Refer to its documentation for methods and accessors common to all processor. @@ -110,6 +121,7 @@ be extended with custom connectivity information, if required. :param assign_torsions: Sets :attr:`~Processor.assign_torsions` :param connect: Sets :attr:`~Processor.connect` :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds` + :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm` :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment` @@ -119,7 +131,8 @@ be extended with custom connectivity information, if required. unknown_atom_treatment=CONOP_WARN, \ check_bond_feasibility=False, \ assign_torsions=True, connect=True, \ - peptide_bonds=True, zero_occ_treatment=CONOP_WARN) + peptide_bonds=True, connect_hetatm=True, \ + zero_occ_treatment=CONOP_WARN) The :class:`RuleBasedProcessor` implements the :class:`Processor` interface. Refer to its documentation for methods and accessors common to all processor. @@ -134,6 +147,7 @@ be extended with custom connectivity information, if required. :param assign_torsions: Sets :attr:`~Processor.assign_torsions` :param connect: Sets :attr:`~Processor.connect` :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds` + :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm` :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment` .. attribute:: fix_elements diff --git a/modules/conop/pymod/export_heuristic.cc b/modules/conop/pymod/export_heuristic.cc index 275245a7e..b83a77ca0 100644 --- a/modules/conop/pymod/export_heuristic.cc +++ b/modules/conop/pymod/export_heuristic.cc @@ -29,11 +29,12 @@ void export_heuristic() { class_<HeuristicProcessor, HeuristicProcessorPtr, boost::noncopyable, bases<Processor> >("HeuristicProcessor", init<>()) - .def(init<bool,bool,bool,bool,ConopAction>( + .def(init<bool,bool,bool,bool,bool,ConopAction>( (arg("check_bond_feasibility")=false, arg("assign_torsions")=true, arg("connect")=true, arg("peptide_bonds")=true, + arg("connect_hetatm")=true, arg("zero_occ_treatment")=CONOP_WARN))) ; } diff --git a/modules/conop/pymod/export_processor.cc b/modules/conop/pymod/export_processor.cc index 262a1dfff..726979edf 100644 --- a/modules/conop/pymod/export_processor.cc +++ b/modules/conop/pymod/export_processor.cc @@ -86,6 +86,8 @@ void export_processor() { &Processor::SetConnect) .add_property("peptide_bonds", &Processor::GetConnectAminoAcids, &Processor::SetConnectAminoAcids) + .add_property("connect_hetatm", &Processor::GetConnectHetatm, + &Processor::SetConnectHetatm) .add_property("zero_occ_treatment", &Processor::GetZeroOccTreatment, &Processor::SetZeroOccTreatment) .def("Process", &Processor::Process, diff --git a/modules/conop/pymod/export_rule_based.cc b/modules/conop/pymod/export_rule_based.cc index 7b22b6d2d..ad0414dd2 100644 --- a/modules/conop/pymod/export_rule_based.cc +++ b/modules/conop/pymod/export_rule_based.cc @@ -28,7 +28,7 @@ void export_rule_based() { class_<RuleBasedProcessor, RuleBasedProcessorPtr, boost::noncopyable, bases<Processor> >("RuleBasedProcessor", init<CompoundLibPtr>()) - .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,ConopAction>( + .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,bool,ConopAction>( (arg("lib"), arg("fix_elements")=true, arg("strict_hydrogens")=false, arg("unknown_res_treatment")=CONOP_WARN, arg("unknown_atom_treatment")=CONOP_WARN, @@ -36,6 +36,7 @@ void export_rule_based() { arg("assign_torsions")=true, arg("connect")=true, arg("peptide_bonds")=true, + arg("connect_hetatm")=true, arg("zero_occ_treatment")=CONOP_WARN))) .add_property("fix_element", &RuleBasedProcessor::GetFixElement, &RuleBasedProcessor::SetFixElement) diff --git a/modules/conop/src/heuristic.hh b/modules/conop/src/heuristic.hh index 2b70864aa..5a72250f1 100644 --- a/modules/conop/src/heuristic.hh +++ b/modules/conop/src/heuristic.hh @@ -41,8 +41,9 @@ public: virtual ProcessorPtr Copy() const { return ProcessorPtr(new HeuristicProcessor(*this)); } - HeuristicProcessor(bool bf, bool at, bool cn, bool aa, ConopAction zo): - Processor(bf, at, cn, aa, zo), + HeuristicProcessor(bool bf, bool at, bool cn, bool aa, bool ch, + ConopAction zo): + Processor(bf, at, cn, aa, ch, zo), lib_() {} diff --git a/modules/conop/src/processor.cc b/modules/conop/src/processor.cc index 844256a3c..840c8b4a7 100644 --- a/modules/conop/src/processor.cc +++ b/modules/conop/src/processor.cc @@ -304,6 +304,9 @@ void Processor::ConnectAtomsOfResidue(mol::ResidueHandle rh, a2.GetElement()=="D")) { continue; } + if (!connect_hetatm_ && a1.IsHetAtom() && a2.IsHetAtom()) { + continue; + } if (!this->GetCheckBondFeasibility()) { e.Connect(a1, a2, bond.order); } else { @@ -391,6 +394,9 @@ void Processor::DistanceBasedConnect(mol::AtomHandle atom) const for (mol::AtomHandleList::const_iterator it=alist.begin(), e=alist.end();it!=e;++it) { if (*it!=atom) { + if (!connect_hetatm_ && it->IsHetAtom() && atom.IsHetAtom()) { + continue; + } if (IsBondFeasible(atom, *it)) { if (it->GetResidue()==res_a || Processor::AreResiduesConsecutive(res_a, it->GetResidue())) { diff --git a/modules/conop/src/processor.hh b/modules/conop/src/processor.hh index 81add923d..2c7bf495a 100644 --- a/modules/conop/src/processor.hh +++ b/modules/conop/src/processor.hh @@ -66,19 +66,20 @@ protected: void DistanceBasedConnect(mol::AtomHandle atom) const; mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const; public: - Processor(bool bf, bool at, bool cn, bool aa, ConopAction zo): check_bond_feasibility_(bf), - assign_torsions_(at), connect_(cn), connect_aa_(aa), - zero_occ_treatment_(zo) {} + Processor(bool bf, bool at, bool cn, bool aa, bool ch, ConopAction zo): + check_bond_feasibility_(bf), assign_torsions_(at), connect_(cn), + connect_aa_(aa), connect_hetatm_(ch), zero_occ_treatment_(zo) {} Processor(): check_bond_feasibility_(false), assign_torsions_(true), connect_(true), connect_aa_(true), - zero_occ_treatment_(CONOP_SILENT) {} + connect_hetatm_(true), zero_occ_treatment_(CONOP_SILENT) {} + void SetConnect(bool connect) { connect_ = connect; } - bool GetConnect() const { return connect_; } + void SetAssignTorsions(bool flag) { assign_torsions_ = flag; } @@ -92,23 +93,29 @@ public: void SetConnectAminoAcids(bool c) { connect_aa_ = c; } + + bool GetConnectHetatm() const { + return connect_hetatm_; + } + void SetConnectHetatm(bool c) { + connect_hetatm_ = c; + } + bool GetCheckBondFeasibility() const { return check_bond_feasibility_; } - void SetCheckBondFeasibility(bool flag) { check_bond_feasibility_ = flag; } - ConopAction GetZeroOccTreatment() const { return zero_occ_treatment_; } - void SetZeroOccTreatment(ConopAction action) { zero_occ_treatment_ = action; } + virtual String ToString() const = 0; protected: String OptionsToString() const; @@ -117,6 +124,7 @@ private: bool assign_torsions_; bool connect_; bool connect_aa_; + bool connect_hetatm_; ConopAction zero_occ_treatment_; }; diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh index cea8d5fd3..15815f9fd 100644 --- a/modules/conop/src/rule_based.hh +++ b/modules/conop/src/rule_based.hh @@ -45,8 +45,8 @@ public: RuleBasedProcessor(CompoundLibPtr compound_lib, bool fe, bool sh, ConopAction ur, ConopAction ua, bool bf, bool at, bool cn, - bool aa, ConopAction zo): - Processor(bf, at, cn, aa, zo), lib_(compound_lib), fix_element_(fe), + bool aa, bool ch, ConopAction zo): + Processor(bf, at, cn, aa, ch, zo), lib_(compound_lib), fix_element_(fe), strict_hydrogens_(sh), unk_res_treatment_(ur), unk_atom_treatment_(ua) { diff --git a/modules/conop/tests/test_heuristic_conop.cc b/modules/conop/tests/test_heuristic_conop.cc index d6ba9f058..acb0ed9a0 100644 --- a/modules/conop/tests/test_heuristic_conop.cc +++ b/modules/conop/tests/test_heuristic_conop.cc @@ -179,5 +179,49 @@ BOOST_AUTO_TEST_CASE(quack_types_unknown_residues) { } + +BOOST_AUTO_TEST_CASE(hetatom_connect_heuristic) { + + // STEP 1: Specify two atoms as hetatoms - they should be connected + // by the processor + mol::EntityHandle e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + ost::mol::AtomHandleList atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + HeuristicProcessor proc; + proc.Process(e); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); + + // STEP 2: Same thing again but we tell the processor NOT to connect + // hetatoms + e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + proc.SetConnectHetatm(false); + proc.Process(e); + BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/conop/tests/test_rule_based_conop.cc b/modules/conop/tests/test_rule_based_conop.cc index 4e5222242..afe749222 100644 --- a/modules/conop/tests/test_rule_based_conop.cc +++ b/modules/conop/tests/test_rule_based_conop.cc @@ -26,6 +26,7 @@ #include <ost/log.hh> #include <ost/conop/rule_based.hh> #include <ost/conop/conop.hh> +#include <ost/mol/builder.hh> #include "helper.hh" using boost::unit_test_framework::test_suite; @@ -55,13 +56,13 @@ BOOST_AUTO_TEST_CASE(rule_based_init_check) BOOST_CHECK_THROW(RuleBasedProcessor rbc1(lib), ost::Error); BOOST_CHECK_THROW(RuleBasedProcessor rbc2(lib, true, false, CONOP_WARN, CONOP_WARN, false, true, true, true, - CONOP_WARN), ost::Error); + true, CONOP_WARN), ost::Error); lib = load_lib(); if (!lib) { return; } BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc3(lib)); BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc4(lib, true, false, CONOP_WARN, CONOP_WARN, false, true, true, - true, CONOP_WARN)); + true, true, CONOP_WARN)); } BOOST_AUTO_TEST_CASE(rule_based_set_get_flags) @@ -211,4 +212,50 @@ BOOST_AUTO_TEST_CASE(rule_based_unk_res) BOOST_CHECK(!ent2.FindResidue("A", 1)); } +BOOST_AUTO_TEST_CASE(hetatom_connect_rule_based) { + + CompoundLibPtr lib = load_lib(); + if (!lib) { return; } + + // STEP 1: Specify two atoms as hetatoms - they should be connected + // by the processor + mol::EntityHandle e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + ost::mol::AtomHandleList atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + RuleBasedProcessor proc(lib); + proc.Process(e); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); + + // STEP 2: Same thing again but we tell the processor NOT to connect + // hetatoms + e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + proc.SetConnectHetatm(false); + proc.Process(e); + BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/doc/profile.rst b/modules/io/doc/profile.rst index ec6d4371c..e150a1346 100644 --- a/modules/io/doc/profile.rst +++ b/modules/io/doc/profile.rst @@ -94,7 +94,7 @@ The IOProfile Class .. class:: IOProfile(dialect='PDB', quack_mode=False, fault_tolerant=False,\ join_spread_atom_records=False, no_hetatms=False,\ - calpha_only=False, processor=None) + calpha_only=False, read_conect=False, processor=None) Aggregates flags that control the import of molecular structures. @@ -147,6 +147,16 @@ The IOProfile Class most useful in combination with protein-only PDB files to speed up subsequent processing and importing. + .. attribute:: read_conect + + :type: bool + + Only relevant when reading PDB format. When set to true, reads CONECT + statements and applies them in the pdb reader. This can result in + hydrogen bonds salt bridges etc. to be connected. Check the PDB format + definition for more info. Disables connectity generation in case of HETATM + in processor except for water. + .. attribute:: processor :type: :class:`ost.conop.HeuristicProcessor`/:class:`ost.conop.RuleBasedProcessor` diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index c47d044de..965a39ce4 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -83,7 +83,8 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, fault_tolerant=None, load_multi=False, quack_mode=None, join_spread_atom_records=None, calpha_only=None, profile='DEFAULT', remote=False, remote_repo='pdb', - dialect=None, seqres=False, bond_feasibility_check=None): + dialect=None, seqres=False, bond_feasibility_check=None, + read_conect=False): """ Load PDB file from disk and return one or more entities. Several options allow to customize the exact behaviour of the PDB import. For more information @@ -169,6 +170,16 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, If set, overrides the value of :attr:`ost.conop.Processor.check_bond_feasibility` :type bond_feasibility_check: :class:`bool` + :param read_conect: By default, OpenStructure doesn't read CONECT statements in + a pdb file. Reason is that they're often missing and we prefer + to rely on the chemical component dictionary from the PDB. + However, there may be cases where you really want these CONECT + statements. For example novel compounds with no entry in + the chemical component dictionary. Setting this to True has + two effects: 1) CONECT statements are read and blindly applied + 2) The processor does not connect any pair of HETATOM atoms in + order to not interfer with the CONECT statements. + :type read_conect: :class:`bool` :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is True. @@ -194,9 +205,12 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, prof.no_hetatms=_override(prof.no_hetatms, no_hetatms) prof.dialect=_override(prof.dialect, dialect) prof.quack_mode=_override(prof.quack_mode, quack_mode) + prof.read_conect=_override(prof.read_conect, read_conect) if prof.processor: prof.processor.check_bond_feasibility=_override(prof.processor.check_bond_feasibility, bond_feasibility_check) + prof.processor.connect_hetatm=_override(prof.processor.connect_hetatm, + not read_conect) prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant) prof.join_spread_atom_records=_override(prof.join_spread_atom_records, join_spread_atom_records) diff --git a/modules/io/pymod/export_pdb_io.cc b/modules/io/pymod/export_pdb_io.cc index addb94f3b..cad44b855 100644 --- a/modules/io/pymod/export_pdb_io.cc +++ b/modules/io/pymod/export_pdb_io.cc @@ -45,13 +45,14 @@ void remove_profiles() { void export_pdb_io() { class_<IOProfile>("IOProfile", - init<String,bool,bool,bool,bool,bool, + init<String,bool,bool,bool,bool,bool,bool, conop::ProcessorPtr>((arg("dialect")="PDB", arg("quack_mode")=false, arg("fault_tolerant")=false, arg("join_spread_atom_records")=false, arg("no_hetatms")=false, arg("calpha_only")=false, + arg("read_conect")=false, arg("processor")=conop::ProcessorPtr()))) .def(init<const IOProfile&>()) .def_readwrite("dialect", &IOProfile::dialect) @@ -60,6 +61,7 @@ void export_pdb_io() .def_readwrite("no_hetatms", &IOProfile::no_hetatms) .def_readwrite("calpha_only", &IOProfile::calpha_only) .def_readwrite("join_spread_atom_records", &IOProfile::join_spread_atom_records) + .def_readwrite("read_conect", &IOProfile::read_conect) .def_readwrite("processor", &IOProfile::processor) .def("Copy", &IOProfile::Copy) .def(self_ns::str(self)) diff --git a/modules/io/src/mol/io_profile.hh b/modules/io/src/mol/io_profile.hh index 22060e8c6..d8f078e39 100644 --- a/modules/io/src/mol/io_profile.hh +++ b/modules/io/src/mol/io_profile.hh @@ -31,15 +31,15 @@ namespace ost { namespace io { struct DLLEXPORT IOProfile { public: IOProfile(String d, bool qm, bool ft, bool js, bool nh, - bool co, conop::ProcessorPtr proc=conop::ProcessorPtr()): + bool co, bool rc, conop::ProcessorPtr proc=conop::ProcessorPtr()): dialect(d), quack_mode(qm), fault_tolerant(ft), join_spread_atom_records(js), - no_hetatms(nh), calpha_only(co), processor(proc) + no_hetatms(nh), calpha_only(co), read_conect(rc), processor(proc) { } IOProfile(): dialect("PDB"), quack_mode(false), fault_tolerant(false), join_spread_atom_records(false), no_hetatms(false), - calpha_only(false), processor() + calpha_only(false), read_conect(false), processor() { } String dialect; @@ -48,11 +48,12 @@ public: bool join_spread_atom_records; bool no_hetatms; bool calpha_only; + bool read_conect; conop::ProcessorPtr processor; IOProfile Copy() { return IOProfile(dialect, quack_mode, fault_tolerant, join_spread_atom_records, - no_hetatms, calpha_only, + no_hetatms, calpha_only, read_conect, processor ? processor->Copy() : conop::ProcessorPtr()); } }; @@ -66,6 +67,7 @@ inline std::ostream& operator<<(std::ostream& stream, const IOProfile& p) << "calpha_only=" << (p.calpha_only ? "True" : "False") << ", " << "fault_tolerant=" << (p.fault_tolerant ? "True" : "False") << ", " << "no_hetatms=" << (p.no_hetatms ? "True" : "False") << ", " + << "read_conect=" << (p.read_conect ? "True" : "False") << ", " << "processor=" << (p.processor ? p.processor->ToString() : "None") << ")"; return stream; } diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index 3aeefcbd7..e4e11d7bc 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -27,6 +27,7 @@ #include <ost/profile.hh> #include <ost/log.hh> #include <ost/message.hh> +#include <ost/mol/bond_handle.hh> #include <ost/conop/conop.hh> #include <ost/geom/mat3.hh> @@ -356,11 +357,16 @@ void PDBReader::Import(mol::EntityHandle& ent, break; case 'C': case 'c': - if (curr_line.size()<20) { + if (curr_line.size()<6) { LOG_TRACE("skipping entry"); continue; } - if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { + else if (IEquals(curr_line.substr(0, 6), StringRef("CONECT", 6)) && + profile_.read_conect) { + LOG_TRACE("processing CONECT entry"); + this->ParseConectEntry(curr_line, line_num_, ent); + } + else if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { LOG_TRACE("processing COMPND entry"); this->ParseCompndEntry(curr_line, line_num_); } @@ -566,7 +572,8 @@ bool PDBReader::EnsureLineLength(const StringRef& line, size_t size) bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, String& chain_name, StringRef& res_name, mol::ResNum& resnum, StringRef& atom_name, - char& alt_loc, const StringRef& record_type) + char& alt_loc, const StringRef& record_type, + int& serial) { if (!this->EnsureLineLength(line, 27)) { return false; @@ -602,6 +609,16 @@ bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, char ins_c=line[26]; resnum=to_res_num(res_num.second, ins_c); + + std::pair<bool, int> tmp = line.substr(6, 5).trim().to_int(); + if(!tmp.first) { + if (profile_.fault_tolerant) { + return false; + } + throw IOException(str(format("invalid atom serial on line %d") % line_num)); + } + serial = tmp.second; + return true; } @@ -615,8 +632,10 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num, char alt_loc=0; StringRef res_name, atom_name; mol::ResNum res_num(0); + int serial = -1; if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, - atom_name, alt_loc, StringRef("ANISOU", 6))) { + atom_name, alt_loc, StringRef("ANISOU", 6), + serial)) { return; } double anisou[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; @@ -673,8 +692,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, String chain_name; StringRef res_name, atom_name; mol::ResNum res_num(0); + int serial = -1; if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, - atom_name, alt_loc, record_type)) { + atom_name, alt_loc, record_type, serial)) { return; } std::pair<bool, Real> charge, radius; @@ -860,6 +880,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, ah.SetCharge(charge.second); } ah.SetHetAtom(record_type[0]=='H'); + if(profile_.read_conect && serial != -1) { + this->amap_[serial] = ah; + } } void PDBReader::ParseHelixEntry(const StringRef& line) @@ -911,4 +934,45 @@ void PDBReader::ParseStrandEntry(const StringRef& line) } } +void PDBReader::ParseConectEntry (const StringRef& line, int line_num, mol::EntityHandle& ent) +{ + if (line.size()<16) { + if (profile_.fault_tolerant) { + LOG_WARNING("invalid CONECT record on line " << line_num + << ": record is too short"); + return; + } + std::stringstream ss("invalid CONECT record on line "); + ss << line_num <<": record is too short"; + throw IOException(ss.str()); + } + if (line.rtrim().size()>80) { + if (profile_.fault_tolerant) { + LOG_WARNING("invalid CONECT record on line " << line_num + << ": record is too long"); + return; + } + std::stringstream ss("invalid CONECT record on line "); + ss << line_num <<": whole record is too long"; + throw IOException(ss.str()); + } + mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT); + const int starting_atom = line.substr(6,5).trim().to_int().second; + // map for bonds, in the form of <serial_number, bond_order> + std::map<int, unsigned char> connected_atoms; + for (int i=0; i<4; i++) { + if (static_cast<int>(line.length()) < 11+i*5+5) break; + const int connected_atom = line.substr(11+i*5, 5).trim().to_int().second; + connected_atoms[connected_atom]+=1; + } + for (auto& pair : connected_atoms) { + auto at_one_it = this->amap_.find(starting_atom); + auto at_two_it = this->amap_.find(pair.first); + if(at_one_it != this->amap_.end() && at_two_it != this->amap_.end()) { + editor.Connect(at_one_it->second, at_two_it->second, pair.second); + } + } +} + + }} diff --git a/modules/io/src/mol/pdb_reader.hh b/modules/io/src/mol/pdb_reader.hh index df98fa2dd..0d75d1612 100644 --- a/modules/io/src/mol/pdb_reader.hh +++ b/modules/io/src/mol/pdb_reader.hh @@ -83,13 +83,15 @@ private: bool ParseAtomIdent(const StringRef& line, int line_num, String& chain_name, StringRef& res, mol::ResNum& resnum, StringRef& atom_name, char& alt_loc, - const StringRef& record_type); + const StringRef& record_type, int& serial); void ParseAnisou(const StringRef& line, int line_num, mol::EntityHandle& h); void ParseHelixEntry(const StringRef& line); void ParseStrandEntry(const StringRef& line); void Init(const boost::filesystem::path& loc); bool EnsureLineLength(const StringRef& line, size_t size); + void ParseConectEntry(const StringRef& line, int line_num, mol::EntityHandle& ent); + std::map<int, mol::AtomHandle> amap_; // <serial_number, AtomHandle> mol::ChainHandle curr_chain_; mol::ResidueHandle curr_residue_; int chain_count_; diff --git a/modules/io/src/mol/pdb_writer.cc b/modules/io/src/mol/pdb_writer.cc index 7b28c8b2a..d4461402c 100644 --- a/modules/io/src/mol/pdb_writer.cc +++ b/modules/io/src/mol/pdb_writer.cc @@ -35,6 +35,7 @@ #include <ost/mol/residue_handle.hh> #include <ost/mol/chain_handle.hh> #include <ost/mol/entity_visitor.hh> +#include <ost/mol/bond_handle.hh> #include "pdb_writer.hh" using boost::format; @@ -337,20 +338,21 @@ public: } private: public: - virtual bool VisitAtom(const mol::AtomHandle& atom) { +virtual bool VisitAtom(const mol::AtomHandle& atom) { if (atom.IsHetAtom()) { bool has_partner=false; int atom_index=atom_indices_[atom.GetHashCode()]; mol::AtomHandleList partners=atom.GetBondPartners(); std::list<int> partner_indices; - for (mol::AtomHandleList::const_iterator i=partners.begin(); - i!=partners.end(); ++i) { - int pind=atom_indices_[i->GetHashCode()]; - if (pind!=0) { - partner_indices.push_back(pind); - has_partner=true; + for (auto partner : partners){ + mol::BondHandle bond = atom.FindBondToAtom(partner); + int pind=atom_indices_[partner.GetHashCode()]; + if (pind!=0) { + for (int i=0; i < int(bond.GetBondOrder()); i++) + {partner_indices.push_back(pind);} + has_partner=true; + } } - } if (has_partner) { write_conect(ostr_, atom_index, partner_indices); } diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc index 400034f9b..99071118e 100644 --- a/modules/io/tests/test_io_pdb.cc +++ b/modules/io/tests/test_io_pdb.cc @@ -724,13 +724,15 @@ BOOST_AUTO_TEST_CASE(write_ter6) "testfiles/pdb/ter4-out.pdb")); } -BOOST_AUTO_TEST_CASE(write_conect) +BOOST_AUTO_TEST_CASE(read_write_conect) { // this scope is required to force the writer stream to be closed before // opening the file again in compare_files. Avoids a race condition. { - PDBReader reader(String("testfiles/pdb/conect.pdb"), IOProfile()); - PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), IOProfile()); + IOProfile profile; + profile.read_conect=true; + PDBReader reader(String("testfiles/pdb/conect.pdb"), profile); + PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), profile); mol::EntityHandle ent=mol::CreateEntity(); reader.Import(ent); conop::HeuristicProcessor heu_proc; @@ -973,7 +975,7 @@ BOOST_AUTO_TEST_CASE(charmm_rname) { { PDBWriter writer(String("testfiles/pdb/charmm_rname-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); @@ -992,7 +994,7 @@ BOOST_AUTO_TEST_CASE(charmm_longcname) { { PDBWriter writer(String("testfiles/pdb/charmm_longcname-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); @@ -1011,7 +1013,7 @@ BOOST_AUTO_TEST_CASE(write_charmm_ter) { { PDBWriter writer(String("testfiles/pdb/charmm_ter-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); diff --git a/modules/io/tests/test_io_pdb.py b/modules/io/tests/test_io_pdb.py index 77a504fd8..7a34502ff 100644 --- a/modules/io/tests/test_io_pdb.py +++ b/modules/io/tests/test_io_pdb.py @@ -51,7 +51,36 @@ class TestPDB(unittest.TestCase): crambin_pdb = io.LoadPDB('1crn', remote=True, remote_repo='pdb') self.assertEqual(len(crambin_pdb.residues), 46) self.assertEqual(len(crambin_pdb.atoms), 327) - + + def test_conect(self): + """ See whether read_conect has an effect on reading CONECT + """ + prot = io.LoadPDB("testfiles/pdb/conect.pdb") + res = prot.FindResidue("A", mol.ResNum(3)) + a1 = res.FindAtom("N") + a2 = res.FindAtom("CA") + tmp = sorted([str(a1), str(a2)]) + bond = None + for b in prot.bonds: + if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp: + bond = b + break + self.assertTrue(bond is not None) + self.assertEqual(bond.bond_order, 1) + + prot = io.LoadPDB("testfiles/pdb/conect.pdb", read_conect=True) + res = prot.FindResidue("A", mol.ResNum(3)) + a1 = res.FindAtom("N") + a2 = res.FindAtom("CA") + tmp = sorted([str(a1), str(a2)]) + bond = None + for b in prot.bonds: + if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp: + bond = b + break + self.assertTrue(bond is not None) + self.assertEqual(bond.bond_order, 2) # now it should be two + if __name__== '__main__': from ost import testutils testutils.RunTests() diff --git a/modules/io/tests/testfiles/pdb/conect.pdb b/modules/io/tests/testfiles/pdb/conect.pdb index b4ce6bd8e..3b29d58d3 100644 --- a/modules/io/tests/testfiles/pdb/conect.pdb +++ b/modules/io/tests/testfiles/pdb/conect.pdb @@ -13,26 +13,26 @@ ATOM 12 CB VAL A 2 -5.589 17.486 20.437 0.50 18.24 C ATOM 13 CG1 VAL A 2 -7.059 17.853 20.200 0.50 19.12 C ATOM 14 CG2 VAL A 2 -4.672 18.535 19.802 0.50 14.29 C TER 15 VAL A 2 -HETATM 16 N PS0 A 3 -11.234 16.802 30.197 0.50 7.63 N -HETATM 17 CA PS0 A 3 -11.284 16.497 28.779 0.50 6.15 C -HETATM 18 C PS0 A 3 -10.818 17.753 28.010 0.50 8.61 C -HETATM 19 OS PS0 A 3 -11.697 18.822 28.249 0.50 4.53 O -HETATM 20 CB PS0 A 3 -12.670 16.079 28.322 0.50 12.53 C -HETATM 21 CG PS0 A 3 -13.432 14.971 29.048 0.50 11.04 C -HETATM 22 CD1 PS0 A 3 -13.076 13.629 28.983 0.50 10.33 C -HETATM 23 CD2 PS0 A 3 -14.560 15.373 29.807 0.50 12.71 C -HETATM 24 CE1 PS0 A 3 -13.818 12.638 29.661 0.50 11.11 C -HETATM 25 CE2 PS0 A 3 -15.300 14.380 30.483 0.50 14.32 C -HETATM 26 CZ PS0 A 3 -14.917 13.043 30.412 0.50 10.61 C -HETATM 27 CM PS0 A 3 -9.405 18.196 28.546 0.50 6.25 C +HETATM 16 N UNL A 3 -11.234 16.802 30.197 0.50 7.63 N +HETATM 17 CA UNL A 3 -11.284 16.497 28.779 0.50 6.15 C +HETATM 18 C UNL A 3 -10.818 17.753 28.010 0.50 8.61 C +HETATM 19 OS UNL A 3 -11.697 18.822 28.249 0.50 4.53 O +HETATM 20 CB UNL A 3 -12.670 16.079 28.322 0.50 12.53 C +HETATM 21 CG UNL A 3 -13.432 14.971 29.048 0.50 11.04 C +HETATM 22 CD1 UNL A 3 -13.076 13.629 28.983 0.50 10.33 C +HETATM 23 CD2 UNL A 3 -14.560 15.373 29.807 0.50 12.71 C +HETATM 24 CE1 UNL A 3 -13.818 12.638 29.661 0.50 11.11 C +HETATM 25 CE2 UNL A 3 -15.300 14.380 30.483 0.50 14.32 C +HETATM 26 CZ UNL A 3 -14.917 13.043 30.412 0.50 10.61 C +HETATM 27 CM UNL A 3 -9.405 18.196 28.546 0.50 6.25 C HETATM 28 O HOH A 4 -3.126 40.621 48.726 1.00 47.60 O HETATM 29 O HOH A 5 -2.279 35.565 45.117 1.00 43.93 O HETATM 30 O HOH A 6 5.765 35.848 41.846 1.00 36.24 O HETATM 31 O HOH A 7 -12.666 40.044 22.441 1.00 41.24 O HETATM 32 O HOH A 8 -4.462 37.411 18.124 1.00 30.94 O HETATM 33 O HOH A 9 -1.109 26.454 19.470 1.00 39.06 O -CONECT 16 17 -CONECT 17 16 18 20 +CONECT 16 17 17 +CONECT 17 16 16 18 20 CONECT 18 17 19 27 CONECT 19 18 CONECT 20 17 21 diff --git a/modules/mol/mm/src/observer.cc b/modules/mol/mm/src/observer.cc index 90a862083..0abc555da 100644 --- a/modules/mol/mm/src/observer.cc +++ b/modules/mol/mm/src/observer.cc @@ -55,7 +55,7 @@ void TrajWriter::Init(boost::shared_ptr<OpenMM::Context> c, registered_ = true; context_ = c; - ost::io::IOProfile profile("CHARMM",false,false,false,false,false); + ost::io::IOProfile profile("CHARMM",false,false,false,false,false,false); ost::io::PDBWriter writer(pdb_filename_, profile); writer.Write(ent.GetAtomList()); -- GitLab