diff --git a/modules/conop/doc/connectivity.rst b/modules/conop/doc/connectivity.rst index f04e978a577e72c6546c654f281abc6aaaadb46d..0dfd005908a8ee624c98bb4ea070da9b9f40b5b9 100644 --- a/modules/conop/doc/connectivity.rst +++ b/modules/conop/doc/connectivity.rst @@ -94,6 +94,15 @@ be extended with custom connectivity information, if required. :type: :class:`ConopAction` + .. attribute:: connect_hetatm + + :type: :class:`bool` + + Whether to connect atoms that are both hetatms. Enabled by default. + Disabling can be useful if there are compounds which are not covered + by the PDB component dictionary and you prefer to create your own + connectivity for those. + .. method:: Process(ent) Processess the entity *ent* according to the current options. @@ -101,7 +110,9 @@ be extended with custom connectivity information, if required. .. class:: HeuristicProcessor(check_bond_feasibility=False, \ assign_torsions=True, connect=True, \ - peptide_bonds=True, zero_occ_treatment=CONOP_WARN) + peptide_bonds=True, + connect_hetatm=True, + zero_occ_treatment=CONOP_WARN) The :class:`HeuristicProcessor` implements the :class:`Processor` interface. Refer to its documentation for methods and accessors common to all processor. @@ -110,6 +121,7 @@ be extended with custom connectivity information, if required. :param assign_torsions: Sets :attr:`~Processor.assign_torsions` :param connect: Sets :attr:`~Processor.connect` :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds` + :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm` :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment` @@ -119,7 +131,8 @@ be extended with custom connectivity information, if required. unknown_atom_treatment=CONOP_WARN, \ check_bond_feasibility=False, \ assign_torsions=True, connect=True, \ - peptide_bonds=True, zero_occ_treatment=CONOP_WARN) + peptide_bonds=True, connect_hetatm=True, \ + zero_occ_treatment=CONOP_WARN) The :class:`RuleBasedProcessor` implements the :class:`Processor` interface. Refer to its documentation for methods and accessors common to all processor. @@ -134,6 +147,7 @@ be extended with custom connectivity information, if required. :param assign_torsions: Sets :attr:`~Processor.assign_torsions` :param connect: Sets :attr:`~Processor.connect` :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds` + :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm` :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment` .. attribute:: fix_elements diff --git a/modules/conop/pymod/export_heuristic.cc b/modules/conop/pymod/export_heuristic.cc index 275245a7eaeeac0ef66b571556a9d64161ceb36a..b83a77ca031b0dfe829ce9781cf9a24c8d4fdb44 100644 --- a/modules/conop/pymod/export_heuristic.cc +++ b/modules/conop/pymod/export_heuristic.cc @@ -29,11 +29,12 @@ void export_heuristic() { class_<HeuristicProcessor, HeuristicProcessorPtr, boost::noncopyable, bases<Processor> >("HeuristicProcessor", init<>()) - .def(init<bool,bool,bool,bool,ConopAction>( + .def(init<bool,bool,bool,bool,bool,ConopAction>( (arg("check_bond_feasibility")=false, arg("assign_torsions")=true, arg("connect")=true, arg("peptide_bonds")=true, + arg("connect_hetatm")=true, arg("zero_occ_treatment")=CONOP_WARN))) ; } diff --git a/modules/conop/pymod/export_processor.cc b/modules/conop/pymod/export_processor.cc index 262a1dffffbc80e4343693d493f2098f6d12a623..726979edfcbb984a8d626cea247b0cdb862d653d 100644 --- a/modules/conop/pymod/export_processor.cc +++ b/modules/conop/pymod/export_processor.cc @@ -86,6 +86,8 @@ void export_processor() { &Processor::SetConnect) .add_property("peptide_bonds", &Processor::GetConnectAminoAcids, &Processor::SetConnectAminoAcids) + .add_property("connect_hetatm", &Processor::GetConnectHetatm, + &Processor::SetConnectHetatm) .add_property("zero_occ_treatment", &Processor::GetZeroOccTreatment, &Processor::SetZeroOccTreatment) .def("Process", &Processor::Process, diff --git a/modules/conop/pymod/export_rule_based.cc b/modules/conop/pymod/export_rule_based.cc index 7b22b6d2d8f29cf474ae5fd367786835a5edb34c..ad0414dd23e7b9b5cb265b26e35dbfba5e5f6ac8 100644 --- a/modules/conop/pymod/export_rule_based.cc +++ b/modules/conop/pymod/export_rule_based.cc @@ -28,7 +28,7 @@ void export_rule_based() { class_<RuleBasedProcessor, RuleBasedProcessorPtr, boost::noncopyable, bases<Processor> >("RuleBasedProcessor", init<CompoundLibPtr>()) - .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,ConopAction>( + .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,bool,ConopAction>( (arg("lib"), arg("fix_elements")=true, arg("strict_hydrogens")=false, arg("unknown_res_treatment")=CONOP_WARN, arg("unknown_atom_treatment")=CONOP_WARN, @@ -36,6 +36,7 @@ void export_rule_based() { arg("assign_torsions")=true, arg("connect")=true, arg("peptide_bonds")=true, + arg("connect_hetatm")=true, arg("zero_occ_treatment")=CONOP_WARN))) .add_property("fix_element", &RuleBasedProcessor::GetFixElement, &RuleBasedProcessor::SetFixElement) diff --git a/modules/conop/src/heuristic.hh b/modules/conop/src/heuristic.hh index 2b70864aa12e5111b98923accb57cddcc4643495..5a72250f146e17861dfe807b4c746573f2086880 100644 --- a/modules/conop/src/heuristic.hh +++ b/modules/conop/src/heuristic.hh @@ -41,8 +41,9 @@ public: virtual ProcessorPtr Copy() const { return ProcessorPtr(new HeuristicProcessor(*this)); } - HeuristicProcessor(bool bf, bool at, bool cn, bool aa, ConopAction zo): - Processor(bf, at, cn, aa, zo), + HeuristicProcessor(bool bf, bool at, bool cn, bool aa, bool ch, + ConopAction zo): + Processor(bf, at, cn, aa, ch, zo), lib_() {} diff --git a/modules/conop/src/processor.cc b/modules/conop/src/processor.cc index 844256a3c00ff4c5eb1f8a0a23d0e15d19478909..840c8b4a798e031e92e2bc94e2574e23aded46d4 100644 --- a/modules/conop/src/processor.cc +++ b/modules/conop/src/processor.cc @@ -304,6 +304,9 @@ void Processor::ConnectAtomsOfResidue(mol::ResidueHandle rh, a2.GetElement()=="D")) { continue; } + if (!connect_hetatm_ && a1.IsHetAtom() && a2.IsHetAtom()) { + continue; + } if (!this->GetCheckBondFeasibility()) { e.Connect(a1, a2, bond.order); } else { @@ -391,6 +394,9 @@ void Processor::DistanceBasedConnect(mol::AtomHandle atom) const for (mol::AtomHandleList::const_iterator it=alist.begin(), e=alist.end();it!=e;++it) { if (*it!=atom) { + if (!connect_hetatm_ && it->IsHetAtom() && atom.IsHetAtom()) { + continue; + } if (IsBondFeasible(atom, *it)) { if (it->GetResidue()==res_a || Processor::AreResiduesConsecutive(res_a, it->GetResidue())) { diff --git a/modules/conop/src/processor.hh b/modules/conop/src/processor.hh index 81add923d0886fd88aa75f47d2093d5b312e5eaf..2c7bf495a6b216ca4b13f111d68ac1dce66b8301 100644 --- a/modules/conop/src/processor.hh +++ b/modules/conop/src/processor.hh @@ -66,19 +66,20 @@ protected: void DistanceBasedConnect(mol::AtomHandle atom) const; mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const; public: - Processor(bool bf, bool at, bool cn, bool aa, ConopAction zo): check_bond_feasibility_(bf), - assign_torsions_(at), connect_(cn), connect_aa_(aa), - zero_occ_treatment_(zo) {} + Processor(bool bf, bool at, bool cn, bool aa, bool ch, ConopAction zo): + check_bond_feasibility_(bf), assign_torsions_(at), connect_(cn), + connect_aa_(aa), connect_hetatm_(ch), zero_occ_treatment_(zo) {} Processor(): check_bond_feasibility_(false), assign_torsions_(true), connect_(true), connect_aa_(true), - zero_occ_treatment_(CONOP_SILENT) {} + connect_hetatm_(true), zero_occ_treatment_(CONOP_SILENT) {} + void SetConnect(bool connect) { connect_ = connect; } - bool GetConnect() const { return connect_; } + void SetAssignTorsions(bool flag) { assign_torsions_ = flag; } @@ -92,23 +93,29 @@ public: void SetConnectAminoAcids(bool c) { connect_aa_ = c; } + + bool GetConnectHetatm() const { + return connect_hetatm_; + } + void SetConnectHetatm(bool c) { + connect_hetatm_ = c; + } + bool GetCheckBondFeasibility() const { return check_bond_feasibility_; } - void SetCheckBondFeasibility(bool flag) { check_bond_feasibility_ = flag; } - ConopAction GetZeroOccTreatment() const { return zero_occ_treatment_; } - void SetZeroOccTreatment(ConopAction action) { zero_occ_treatment_ = action; } + virtual String ToString() const = 0; protected: String OptionsToString() const; @@ -117,6 +124,7 @@ private: bool assign_torsions_; bool connect_; bool connect_aa_; + bool connect_hetatm_; ConopAction zero_occ_treatment_; }; diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh index cea8d5fd3c087c2b1efe0c8429bc8b97f5def4a7..15815f9fd2cec732f9b2afc9bcba9d48a681c9ba 100644 --- a/modules/conop/src/rule_based.hh +++ b/modules/conop/src/rule_based.hh @@ -45,8 +45,8 @@ public: RuleBasedProcessor(CompoundLibPtr compound_lib, bool fe, bool sh, ConopAction ur, ConopAction ua, bool bf, bool at, bool cn, - bool aa, ConopAction zo): - Processor(bf, at, cn, aa, zo), lib_(compound_lib), fix_element_(fe), + bool aa, bool ch, ConopAction zo): + Processor(bf, at, cn, aa, ch, zo), lib_(compound_lib), fix_element_(fe), strict_hydrogens_(sh), unk_res_treatment_(ur), unk_atom_treatment_(ua) { diff --git a/modules/conop/tests/test_heuristic_conop.cc b/modules/conop/tests/test_heuristic_conop.cc index d6ba9f058b1add565219187ee55c402623fc0402..acb0ed9a0cc4050d79c6706753eaa5fed7e82127 100644 --- a/modules/conop/tests/test_heuristic_conop.cc +++ b/modules/conop/tests/test_heuristic_conop.cc @@ -179,5 +179,49 @@ BOOST_AUTO_TEST_CASE(quack_types_unknown_residues) { } + +BOOST_AUTO_TEST_CASE(hetatom_connect_heuristic) { + + // STEP 1: Specify two atoms as hetatoms - they should be connected + // by the processor + mol::EntityHandle e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + ost::mol::AtomHandleList atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + HeuristicProcessor proc; + proc.Process(e); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); + + // STEP 2: Same thing again but we tell the processor NOT to connect + // hetatoms + e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + proc.SetConnectHetatm(false); + proc.Process(e); + BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/conop/tests/test_rule_based_conop.cc b/modules/conop/tests/test_rule_based_conop.cc index 4e52222423b61268614b9d11fb28007daa4b5848..afe74922246ae71b049627e1c197169ff28f7b69 100644 --- a/modules/conop/tests/test_rule_based_conop.cc +++ b/modules/conop/tests/test_rule_based_conop.cc @@ -26,6 +26,7 @@ #include <ost/log.hh> #include <ost/conop/rule_based.hh> #include <ost/conop/conop.hh> +#include <ost/mol/builder.hh> #include "helper.hh" using boost::unit_test_framework::test_suite; @@ -55,13 +56,13 @@ BOOST_AUTO_TEST_CASE(rule_based_init_check) BOOST_CHECK_THROW(RuleBasedProcessor rbc1(lib), ost::Error); BOOST_CHECK_THROW(RuleBasedProcessor rbc2(lib, true, false, CONOP_WARN, CONOP_WARN, false, true, true, true, - CONOP_WARN), ost::Error); + true, CONOP_WARN), ost::Error); lib = load_lib(); if (!lib) { return; } BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc3(lib)); BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc4(lib, true, false, CONOP_WARN, CONOP_WARN, false, true, true, - true, CONOP_WARN)); + true, true, CONOP_WARN)); } BOOST_AUTO_TEST_CASE(rule_based_set_get_flags) @@ -211,4 +212,50 @@ BOOST_AUTO_TEST_CASE(rule_based_unk_res) BOOST_CHECK(!ent2.FindResidue("A", 1)); } +BOOST_AUTO_TEST_CASE(hetatom_connect_rule_based) { + + CompoundLibPtr lib = load_lib(); + if (!lib) { return; } + + // STEP 1: Specify two atoms as hetatoms - they should be connected + // by the processor + mol::EntityHandle e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + ost::mol::AtomHandleList atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + RuleBasedProcessor proc(lib); + proc.Process(e); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); + + // STEP 2: Same thing again but we tell the processor NOT to connect + // hetatoms + e = Builder() + .Chain("A") + .Residue("GLY") + .Atom("N", geom::Vec3(-8.22, 35.20, 22.39)) + .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49)) + .Atom("C", geom::Vec3(-8.59, 35.93, 20.06)) + .Atom("O", geom::Vec3(-7.88, 36.30, 19.12)) + .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53)) + ; + + atoms = e.GetAtomList(); + atoms[0].SetHetAtom(true); // N + atoms[1].SetHetAtom(true); // CA + proc.SetConnectHetatm(false); + proc.Process(e); + BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA"))); + BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C"))); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/doc/profile.rst b/modules/io/doc/profile.rst index ec6d4371c1bcfac46790710a4bf6f6a61a1df252..e150a1346c64ada32a9fa53611d3b08ef00ae8ff 100644 --- a/modules/io/doc/profile.rst +++ b/modules/io/doc/profile.rst @@ -94,7 +94,7 @@ The IOProfile Class .. class:: IOProfile(dialect='PDB', quack_mode=False, fault_tolerant=False,\ join_spread_atom_records=False, no_hetatms=False,\ - calpha_only=False, processor=None) + calpha_only=False, read_conect=False, processor=None) Aggregates flags that control the import of molecular structures. @@ -147,6 +147,16 @@ The IOProfile Class most useful in combination with protein-only PDB files to speed up subsequent processing and importing. + .. attribute:: read_conect + + :type: bool + + Only relevant when reading PDB format. When set to true, reads CONECT + statements and applies them in the pdb reader. This can result in + hydrogen bonds salt bridges etc. to be connected. Check the PDB format + definition for more info. Disables connectity generation in case of HETATM + in processor except for water. + .. attribute:: processor :type: :class:`ost.conop.HeuristicProcessor`/:class:`ost.conop.RuleBasedProcessor` diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index c47d044def4ace83b537c957e4111a4c9ae9681a..965a39ce4b95c15fcf4c4703a910b1b7145233f8 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -83,7 +83,8 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, fault_tolerant=None, load_multi=False, quack_mode=None, join_spread_atom_records=None, calpha_only=None, profile='DEFAULT', remote=False, remote_repo='pdb', - dialect=None, seqres=False, bond_feasibility_check=None): + dialect=None, seqres=False, bond_feasibility_check=None, + read_conect=False): """ Load PDB file from disk and return one or more entities. Several options allow to customize the exact behaviour of the PDB import. For more information @@ -169,6 +170,16 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, If set, overrides the value of :attr:`ost.conop.Processor.check_bond_feasibility` :type bond_feasibility_check: :class:`bool` + :param read_conect: By default, OpenStructure doesn't read CONECT statements in + a pdb file. Reason is that they're often missing and we prefer + to rely on the chemical component dictionary from the PDB. + However, there may be cases where you really want these CONECT + statements. For example novel compounds with no entry in + the chemical component dictionary. Setting this to True has + two effects: 1) CONECT statements are read and blindly applied + 2) The processor does not connect any pair of HETATOM atoms in + order to not interfer with the CONECT statements. + :type read_conect: :class:`bool` :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is True. @@ -194,9 +205,12 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, prof.no_hetatms=_override(prof.no_hetatms, no_hetatms) prof.dialect=_override(prof.dialect, dialect) prof.quack_mode=_override(prof.quack_mode, quack_mode) + prof.read_conect=_override(prof.read_conect, read_conect) if prof.processor: prof.processor.check_bond_feasibility=_override(prof.processor.check_bond_feasibility, bond_feasibility_check) + prof.processor.connect_hetatm=_override(prof.processor.connect_hetatm, + not read_conect) prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant) prof.join_spread_atom_records=_override(prof.join_spread_atom_records, join_spread_atom_records) diff --git a/modules/io/pymod/export_pdb_io.cc b/modules/io/pymod/export_pdb_io.cc index addb94f3bbd0d8f101da1b846af2a5b90c042592..cad44b855f2869f3234ad22bbc3c5a66ede2da8b 100644 --- a/modules/io/pymod/export_pdb_io.cc +++ b/modules/io/pymod/export_pdb_io.cc @@ -45,13 +45,14 @@ void remove_profiles() { void export_pdb_io() { class_<IOProfile>("IOProfile", - init<String,bool,bool,bool,bool,bool, + init<String,bool,bool,bool,bool,bool,bool, conop::ProcessorPtr>((arg("dialect")="PDB", arg("quack_mode")=false, arg("fault_tolerant")=false, arg("join_spread_atom_records")=false, arg("no_hetatms")=false, arg("calpha_only")=false, + arg("read_conect")=false, arg("processor")=conop::ProcessorPtr()))) .def(init<const IOProfile&>()) .def_readwrite("dialect", &IOProfile::dialect) @@ -60,6 +61,7 @@ void export_pdb_io() .def_readwrite("no_hetatms", &IOProfile::no_hetatms) .def_readwrite("calpha_only", &IOProfile::calpha_only) .def_readwrite("join_spread_atom_records", &IOProfile::join_spread_atom_records) + .def_readwrite("read_conect", &IOProfile::read_conect) .def_readwrite("processor", &IOProfile::processor) .def("Copy", &IOProfile::Copy) .def(self_ns::str(self)) diff --git a/modules/io/src/mol/io_profile.hh b/modules/io/src/mol/io_profile.hh index 22060e8c6514d0597d0e3d17113f6941e67c6d93..d8f078e391fd867e847c71ecd821eafba12ad78f 100644 --- a/modules/io/src/mol/io_profile.hh +++ b/modules/io/src/mol/io_profile.hh @@ -31,15 +31,15 @@ namespace ost { namespace io { struct DLLEXPORT IOProfile { public: IOProfile(String d, bool qm, bool ft, bool js, bool nh, - bool co, conop::ProcessorPtr proc=conop::ProcessorPtr()): + bool co, bool rc, conop::ProcessorPtr proc=conop::ProcessorPtr()): dialect(d), quack_mode(qm), fault_tolerant(ft), join_spread_atom_records(js), - no_hetatms(nh), calpha_only(co), processor(proc) + no_hetatms(nh), calpha_only(co), read_conect(rc), processor(proc) { } IOProfile(): dialect("PDB"), quack_mode(false), fault_tolerant(false), join_spread_atom_records(false), no_hetatms(false), - calpha_only(false), processor() + calpha_only(false), read_conect(false), processor() { } String dialect; @@ -48,11 +48,12 @@ public: bool join_spread_atom_records; bool no_hetatms; bool calpha_only; + bool read_conect; conop::ProcessorPtr processor; IOProfile Copy() { return IOProfile(dialect, quack_mode, fault_tolerant, join_spread_atom_records, - no_hetatms, calpha_only, + no_hetatms, calpha_only, read_conect, processor ? processor->Copy() : conop::ProcessorPtr()); } }; @@ -66,6 +67,7 @@ inline std::ostream& operator<<(std::ostream& stream, const IOProfile& p) << "calpha_only=" << (p.calpha_only ? "True" : "False") << ", " << "fault_tolerant=" << (p.fault_tolerant ? "True" : "False") << ", " << "no_hetatms=" << (p.no_hetatms ? "True" : "False") << ", " + << "read_conect=" << (p.read_conect ? "True" : "False") << ", " << "processor=" << (p.processor ? p.processor->ToString() : "None") << ")"; return stream; } diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index 3aeefcbd75fde457b96f5a89ba5364fc501a7475..e4e11d7bcb3041588b789381239ed830b8a01878 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -27,6 +27,7 @@ #include <ost/profile.hh> #include <ost/log.hh> #include <ost/message.hh> +#include <ost/mol/bond_handle.hh> #include <ost/conop/conop.hh> #include <ost/geom/mat3.hh> @@ -356,11 +357,16 @@ void PDBReader::Import(mol::EntityHandle& ent, break; case 'C': case 'c': - if (curr_line.size()<20) { + if (curr_line.size()<6) { LOG_TRACE("skipping entry"); continue; } - if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { + else if (IEquals(curr_line.substr(0, 6), StringRef("CONECT", 6)) && + profile_.read_conect) { + LOG_TRACE("processing CONECT entry"); + this->ParseConectEntry(curr_line, line_num_, ent); + } + else if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { LOG_TRACE("processing COMPND entry"); this->ParseCompndEntry(curr_line, line_num_); } @@ -566,7 +572,8 @@ bool PDBReader::EnsureLineLength(const StringRef& line, size_t size) bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, String& chain_name, StringRef& res_name, mol::ResNum& resnum, StringRef& atom_name, - char& alt_loc, const StringRef& record_type) + char& alt_loc, const StringRef& record_type, + int& serial) { if (!this->EnsureLineLength(line, 27)) { return false; @@ -602,6 +609,16 @@ bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, char ins_c=line[26]; resnum=to_res_num(res_num.second, ins_c); + + std::pair<bool, int> tmp = line.substr(6, 5).trim().to_int(); + if(!tmp.first) { + if (profile_.fault_tolerant) { + return false; + } + throw IOException(str(format("invalid atom serial on line %d") % line_num)); + } + serial = tmp.second; + return true; } @@ -615,8 +632,10 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num, char alt_loc=0; StringRef res_name, atom_name; mol::ResNum res_num(0); + int serial = -1; if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, - atom_name, alt_loc, StringRef("ANISOU", 6))) { + atom_name, alt_loc, StringRef("ANISOU", 6), + serial)) { return; } double anisou[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; @@ -673,8 +692,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, String chain_name; StringRef res_name, atom_name; mol::ResNum res_num(0); + int serial = -1; if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, - atom_name, alt_loc, record_type)) { + atom_name, alt_loc, record_type, serial)) { return; } std::pair<bool, Real> charge, radius; @@ -860,6 +880,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, ah.SetCharge(charge.second); } ah.SetHetAtom(record_type[0]=='H'); + if(profile_.read_conect && serial != -1) { + this->amap_[serial] = ah; + } } void PDBReader::ParseHelixEntry(const StringRef& line) @@ -911,4 +934,45 @@ void PDBReader::ParseStrandEntry(const StringRef& line) } } +void PDBReader::ParseConectEntry (const StringRef& line, int line_num, mol::EntityHandle& ent) +{ + if (line.size()<16) { + if (profile_.fault_tolerant) { + LOG_WARNING("invalid CONECT record on line " << line_num + << ": record is too short"); + return; + } + std::stringstream ss("invalid CONECT record on line "); + ss << line_num <<": record is too short"; + throw IOException(ss.str()); + } + if (line.rtrim().size()>80) { + if (profile_.fault_tolerant) { + LOG_WARNING("invalid CONECT record on line " << line_num + << ": record is too long"); + return; + } + std::stringstream ss("invalid CONECT record on line "); + ss << line_num <<": whole record is too long"; + throw IOException(ss.str()); + } + mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT); + const int starting_atom = line.substr(6,5).trim().to_int().second; + // map for bonds, in the form of <serial_number, bond_order> + std::map<int, unsigned char> connected_atoms; + for (int i=0; i<4; i++) { + if (static_cast<int>(line.length()) < 11+i*5+5) break; + const int connected_atom = line.substr(11+i*5, 5).trim().to_int().second; + connected_atoms[connected_atom]+=1; + } + for (auto& pair : connected_atoms) { + auto at_one_it = this->amap_.find(starting_atom); + auto at_two_it = this->amap_.find(pair.first); + if(at_one_it != this->amap_.end() && at_two_it != this->amap_.end()) { + editor.Connect(at_one_it->second, at_two_it->second, pair.second); + } + } +} + + }} diff --git a/modules/io/src/mol/pdb_reader.hh b/modules/io/src/mol/pdb_reader.hh index df98fa2dd6bdc7a39e646c88b18bda7ab14e3b1c..0d75d161261e8ee9fefc488a79ebc30bec983f31 100644 --- a/modules/io/src/mol/pdb_reader.hh +++ b/modules/io/src/mol/pdb_reader.hh @@ -83,13 +83,15 @@ private: bool ParseAtomIdent(const StringRef& line, int line_num, String& chain_name, StringRef& res, mol::ResNum& resnum, StringRef& atom_name, char& alt_loc, - const StringRef& record_type); + const StringRef& record_type, int& serial); void ParseAnisou(const StringRef& line, int line_num, mol::EntityHandle& h); void ParseHelixEntry(const StringRef& line); void ParseStrandEntry(const StringRef& line); void Init(const boost::filesystem::path& loc); bool EnsureLineLength(const StringRef& line, size_t size); + void ParseConectEntry(const StringRef& line, int line_num, mol::EntityHandle& ent); + std::map<int, mol::AtomHandle> amap_; // <serial_number, AtomHandle> mol::ChainHandle curr_chain_; mol::ResidueHandle curr_residue_; int chain_count_; diff --git a/modules/io/src/mol/pdb_writer.cc b/modules/io/src/mol/pdb_writer.cc index 7b28c8b2a5da8b5a840ea437fffc55ab8a92cd26..d4461402c725d3c9db93f1f2a1e85fc3521486e7 100644 --- a/modules/io/src/mol/pdb_writer.cc +++ b/modules/io/src/mol/pdb_writer.cc @@ -35,6 +35,7 @@ #include <ost/mol/residue_handle.hh> #include <ost/mol/chain_handle.hh> #include <ost/mol/entity_visitor.hh> +#include <ost/mol/bond_handle.hh> #include "pdb_writer.hh" using boost::format; @@ -337,20 +338,21 @@ public: } private: public: - virtual bool VisitAtom(const mol::AtomHandle& atom) { +virtual bool VisitAtom(const mol::AtomHandle& atom) { if (atom.IsHetAtom()) { bool has_partner=false; int atom_index=atom_indices_[atom.GetHashCode()]; mol::AtomHandleList partners=atom.GetBondPartners(); std::list<int> partner_indices; - for (mol::AtomHandleList::const_iterator i=partners.begin(); - i!=partners.end(); ++i) { - int pind=atom_indices_[i->GetHashCode()]; - if (pind!=0) { - partner_indices.push_back(pind); - has_partner=true; + for (auto partner : partners){ + mol::BondHandle bond = atom.FindBondToAtom(partner); + int pind=atom_indices_[partner.GetHashCode()]; + if (pind!=0) { + for (int i=0; i < int(bond.GetBondOrder()); i++) + {partner_indices.push_back(pind);} + has_partner=true; + } } - } if (has_partner) { write_conect(ostr_, atom_index, partner_indices); } diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc index 400034f9b8cf25a93d9953607e058881b7e4ee37..99071118e987899c00a88dab6ae9e2d010d59e97 100644 --- a/modules/io/tests/test_io_pdb.cc +++ b/modules/io/tests/test_io_pdb.cc @@ -724,13 +724,15 @@ BOOST_AUTO_TEST_CASE(write_ter6) "testfiles/pdb/ter4-out.pdb")); } -BOOST_AUTO_TEST_CASE(write_conect) +BOOST_AUTO_TEST_CASE(read_write_conect) { // this scope is required to force the writer stream to be closed before // opening the file again in compare_files. Avoids a race condition. { - PDBReader reader(String("testfiles/pdb/conect.pdb"), IOProfile()); - PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), IOProfile()); + IOProfile profile; + profile.read_conect=true; + PDBReader reader(String("testfiles/pdb/conect.pdb"), profile); + PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), profile); mol::EntityHandle ent=mol::CreateEntity(); reader.Import(ent); conop::HeuristicProcessor heu_proc; @@ -973,7 +975,7 @@ BOOST_AUTO_TEST_CASE(charmm_rname) { { PDBWriter writer(String("testfiles/pdb/charmm_rname-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); @@ -992,7 +994,7 @@ BOOST_AUTO_TEST_CASE(charmm_longcname) { { PDBWriter writer(String("testfiles/pdb/charmm_longcname-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); @@ -1011,7 +1013,7 @@ BOOST_AUTO_TEST_CASE(write_charmm_ter) { { PDBWriter writer(String("testfiles/pdb/charmm_ter-out.pdb"), - IOProfile("CHARMM", false, false, false, false, false)); + IOProfile("CHARMM", false, false, false, false, false, false)); mol::EntityHandle ent=mol::CreateEntity(); mol::XCSEditor edi=ent.EditXCS(); diff --git a/modules/io/tests/test_io_pdb.py b/modules/io/tests/test_io_pdb.py index 77a504fd8ff650f4ef26ca3a3477b38328c9f7aa..7a34502ff592ae13544d7d4e32b803c8df15f51d 100644 --- a/modules/io/tests/test_io_pdb.py +++ b/modules/io/tests/test_io_pdb.py @@ -51,7 +51,36 @@ class TestPDB(unittest.TestCase): crambin_pdb = io.LoadPDB('1crn', remote=True, remote_repo='pdb') self.assertEqual(len(crambin_pdb.residues), 46) self.assertEqual(len(crambin_pdb.atoms), 327) - + + def test_conect(self): + """ See whether read_conect has an effect on reading CONECT + """ + prot = io.LoadPDB("testfiles/pdb/conect.pdb") + res = prot.FindResidue("A", mol.ResNum(3)) + a1 = res.FindAtom("N") + a2 = res.FindAtom("CA") + tmp = sorted([str(a1), str(a2)]) + bond = None + for b in prot.bonds: + if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp: + bond = b + break + self.assertTrue(bond is not None) + self.assertEqual(bond.bond_order, 1) + + prot = io.LoadPDB("testfiles/pdb/conect.pdb", read_conect=True) + res = prot.FindResidue("A", mol.ResNum(3)) + a1 = res.FindAtom("N") + a2 = res.FindAtom("CA") + tmp = sorted([str(a1), str(a2)]) + bond = None + for b in prot.bonds: + if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp: + bond = b + break + self.assertTrue(bond is not None) + self.assertEqual(bond.bond_order, 2) # now it should be two + if __name__== '__main__': from ost import testutils testutils.RunTests() diff --git a/modules/io/tests/testfiles/pdb/conect.pdb b/modules/io/tests/testfiles/pdb/conect.pdb index b4ce6bd8e3855a2a79ccdcfa6424bf6ad796525c..3b29d58d3a6bdc6d03b655a2761deae141aba39f 100644 --- a/modules/io/tests/testfiles/pdb/conect.pdb +++ b/modules/io/tests/testfiles/pdb/conect.pdb @@ -13,26 +13,26 @@ ATOM 12 CB VAL A 2 -5.589 17.486 20.437 0.50 18.24 C ATOM 13 CG1 VAL A 2 -7.059 17.853 20.200 0.50 19.12 C ATOM 14 CG2 VAL A 2 -4.672 18.535 19.802 0.50 14.29 C TER 15 VAL A 2 -HETATM 16 N PS0 A 3 -11.234 16.802 30.197 0.50 7.63 N -HETATM 17 CA PS0 A 3 -11.284 16.497 28.779 0.50 6.15 C -HETATM 18 C PS0 A 3 -10.818 17.753 28.010 0.50 8.61 C -HETATM 19 OS PS0 A 3 -11.697 18.822 28.249 0.50 4.53 O -HETATM 20 CB PS0 A 3 -12.670 16.079 28.322 0.50 12.53 C -HETATM 21 CG PS0 A 3 -13.432 14.971 29.048 0.50 11.04 C -HETATM 22 CD1 PS0 A 3 -13.076 13.629 28.983 0.50 10.33 C -HETATM 23 CD2 PS0 A 3 -14.560 15.373 29.807 0.50 12.71 C -HETATM 24 CE1 PS0 A 3 -13.818 12.638 29.661 0.50 11.11 C -HETATM 25 CE2 PS0 A 3 -15.300 14.380 30.483 0.50 14.32 C -HETATM 26 CZ PS0 A 3 -14.917 13.043 30.412 0.50 10.61 C -HETATM 27 CM PS0 A 3 -9.405 18.196 28.546 0.50 6.25 C +HETATM 16 N UNL A 3 -11.234 16.802 30.197 0.50 7.63 N +HETATM 17 CA UNL A 3 -11.284 16.497 28.779 0.50 6.15 C +HETATM 18 C UNL A 3 -10.818 17.753 28.010 0.50 8.61 C +HETATM 19 OS UNL A 3 -11.697 18.822 28.249 0.50 4.53 O +HETATM 20 CB UNL A 3 -12.670 16.079 28.322 0.50 12.53 C +HETATM 21 CG UNL A 3 -13.432 14.971 29.048 0.50 11.04 C +HETATM 22 CD1 UNL A 3 -13.076 13.629 28.983 0.50 10.33 C +HETATM 23 CD2 UNL A 3 -14.560 15.373 29.807 0.50 12.71 C +HETATM 24 CE1 UNL A 3 -13.818 12.638 29.661 0.50 11.11 C +HETATM 25 CE2 UNL A 3 -15.300 14.380 30.483 0.50 14.32 C +HETATM 26 CZ UNL A 3 -14.917 13.043 30.412 0.50 10.61 C +HETATM 27 CM UNL A 3 -9.405 18.196 28.546 0.50 6.25 C HETATM 28 O HOH A 4 -3.126 40.621 48.726 1.00 47.60 O HETATM 29 O HOH A 5 -2.279 35.565 45.117 1.00 43.93 O HETATM 30 O HOH A 6 5.765 35.848 41.846 1.00 36.24 O HETATM 31 O HOH A 7 -12.666 40.044 22.441 1.00 41.24 O HETATM 32 O HOH A 8 -4.462 37.411 18.124 1.00 30.94 O HETATM 33 O HOH A 9 -1.109 26.454 19.470 1.00 39.06 O -CONECT 16 17 -CONECT 17 16 18 20 +CONECT 16 17 17 +CONECT 17 16 16 18 20 CONECT 18 17 19 27 CONECT 19 18 CONECT 20 17 21 diff --git a/modules/mol/mm/src/observer.cc b/modules/mol/mm/src/observer.cc index 90a862083f9199b7bbc4f7041d75e4a935a96f82..0abc555daf188c65504a8779824d76a8693e3dc8 100644 --- a/modules/mol/mm/src/observer.cc +++ b/modules/mol/mm/src/observer.cc @@ -55,7 +55,7 @@ void TrajWriter::Init(boost::shared_ptr<OpenMM::Context> c, registered_ = true; context_ = c; - ost::io::IOProfile profile("CHARMM",false,false,false,false,false); + ost::io::IOProfile profile("CHARMM",false,false,false,false,false,false); ost::io::PDBWriter writer(pdb_filename_, profile); writer.Write(ent.GetAtomList());