From d0f67460ae7825bdb830b2d232b4fb6adaeb6c0c Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Fri, 24 Mar 2023 15:40:53 +0100
Subject: [PATCH] Correctly read/write CONECT in PDB files - Thanks Simon Sun

Simon found issues when reading and writing CONECT statements
in PDB io and provided code to fix it. yay

Reading: OpenStructure did not read CONECT statements at all. By default,
it still doesnt. Many PDB files out there don't provide these
statements and we really don't want to rely on them. We rather want
to assign connectivity based on the chemical component dictionary from
the PDB. However, a valid use case are novel compounds that are not
in the component dictionary. Reading connect statements can now be
enabled in the pdb reader through the IOProfile. This may give issues
in processing after reading. OpenStructure implements processors that
are responsible for connectivity. Now that we build some of the
connectivity already at the reading stage, this might cause trouble.
To remedy most of the nightmares that can come out of that, the
processors can now optionally skip connectivities between Hetatoms.

Writing: That was a plain bug when writing CONECT statements for
bond orders > 1.
---
 modules/conop/doc/connectivity.rst           | 18 ++++-
 modules/conop/pymod/export_heuristic.cc      |  3 +-
 modules/conop/pymod/export_processor.cc      |  2 +
 modules/conop/pymod/export_rule_based.cc     |  3 +-
 modules/conop/src/heuristic.hh               |  5 +-
 modules/conop/src/processor.cc               |  6 ++
 modules/conop/src/processor.hh               | 24 ++++---
 modules/conop/src/rule_based.hh              |  4 +-
 modules/conop/tests/test_heuristic_conop.cc  | 44 ++++++++++++
 modules/conop/tests/test_rule_based_conop.cc | 51 +++++++++++++-
 modules/io/doc/profile.rst                   | 12 +++-
 modules/io/pymod/__init__.py                 | 16 ++++-
 modules/io/pymod/export_pdb_io.cc            |  4 +-
 modules/io/src/mol/io_profile.hh             | 10 +--
 modules/io/src/mol/pdb_reader.cc             | 74 ++++++++++++++++++--
 modules/io/src/mol/pdb_reader.hh             |  4 +-
 modules/io/src/mol/pdb_writer.cc             | 18 ++---
 modules/io/tests/test_io_pdb.cc              | 14 ++--
 modules/io/tests/test_io_pdb.py              | 31 +++++++-
 modules/io/tests/testfiles/pdb/conect.pdb    | 28 ++++----
 modules/mol/mm/src/observer.cc               |  2 +-
 21 files changed, 312 insertions(+), 61 deletions(-)

diff --git a/modules/conop/doc/connectivity.rst b/modules/conop/doc/connectivity.rst
index f04e978a5..0dfd00590 100644
--- a/modules/conop/doc/connectivity.rst
+++ b/modules/conop/doc/connectivity.rst
@@ -94,6 +94,15 @@ be extended with custom  connectivity information, if required.
 
     :type: :class:`ConopAction`
 
+  .. attribute:: connect_hetatm
+
+    :type: :class:`bool`
+
+    Whether to connect atoms that are both hetatms. Enabled by default.
+    Disabling can be useful if there are compounds which are not covered
+    by the PDB component dictionary and you prefer to create your own
+    connectivity for those.
+
   .. method:: Process(ent)
   
     Processess the entity *ent* according to the current options.
@@ -101,7 +110,9 @@ be extended with custom  connectivity information, if required.
 
 .. class:: HeuristicProcessor(check_bond_feasibility=False, \
                               assign_torsions=True, connect=True, \
-                              peptide_bonds=True, zero_occ_treatment=CONOP_WARN)
+                              peptide_bonds=True,
+                              connect_hetatm=True,
+                              zero_occ_treatment=CONOP_WARN)
    
   The :class:`HeuristicProcessor` implements the :class:`Processor` interface.
   Refer to its documentation for methods and accessors common to all processor.
@@ -110,6 +121,7 @@ be extended with custom  connectivity information, if required.
   :param assign_torsions: Sets :attr:`~Processor.assign_torsions`
   :param connect: Sets :attr:`~Processor.connect`
   :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds`
+  :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm`
   :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment`
 
 
@@ -119,7 +131,8 @@ be extended with custom  connectivity information, if required.
                               unknown_atom_treatment=CONOP_WARN, \
                               check_bond_feasibility=False, \
                               assign_torsions=True, connect=True, \
-                              peptide_bonds=True, zero_occ_treatment=CONOP_WARN)
+                              peptide_bonds=True, connect_hetatm=True, \
+                              zero_occ_treatment=CONOP_WARN)
    
   The :class:`RuleBasedProcessor` implements the :class:`Processor` interface.
   Refer to its documentation for methods and accessors common to all processor.
@@ -134,6 +147,7 @@ be extended with custom  connectivity information, if required.
   :param assign_torsions: Sets :attr:`~Processor.assign_torsions`
   :param connect: Sets :attr:`~Processor.connect`
   :param peptide_bonds: Sets :attr:`~Processor.peptide_bonds`
+  :param connect_hetatm: Sets :attr:`~Processor.connect_hetatm`
   :param zero_occ_treatment: Sets :attr:`~Processor.zero_occ_treatment`
 
   .. attribute:: fix_elements
diff --git a/modules/conop/pymod/export_heuristic.cc b/modules/conop/pymod/export_heuristic.cc
index 275245a7e..b83a77ca0 100644
--- a/modules/conop/pymod/export_heuristic.cc
+++ b/modules/conop/pymod/export_heuristic.cc
@@ -29,11 +29,12 @@ void export_heuristic() {
   class_<HeuristicProcessor, HeuristicProcessorPtr, 
          boost::noncopyable, bases<Processor> >("HeuristicProcessor", 
          init<>())
-    .def(init<bool,bool,bool,bool,ConopAction>(
+    .def(init<bool,bool,bool,bool,bool,ConopAction>(
          (arg("check_bond_feasibility")=false,
          arg("assign_torsions")=true,
          arg("connect")=true,
          arg("peptide_bonds")=true,
+         arg("connect_hetatm")=true,
          arg("zero_occ_treatment")=CONOP_WARN)))
   ;
 }
diff --git a/modules/conop/pymod/export_processor.cc b/modules/conop/pymod/export_processor.cc
index 262a1dfff..726979edf 100644
--- a/modules/conop/pymod/export_processor.cc
+++ b/modules/conop/pymod/export_processor.cc
@@ -86,6 +86,8 @@ void export_processor() {
                   &Processor::SetConnect)
     .add_property("peptide_bonds", &Processor::GetConnectAminoAcids,
                   &Processor::SetConnectAminoAcids)
+    .add_property("connect_hetatm", &Processor::GetConnectHetatm,
+                  &Processor::SetConnectHetatm)
     .add_property("zero_occ_treatment", &Processor::GetZeroOccTreatment,
                   &Processor::SetZeroOccTreatment)
     .def("Process", &Processor::Process, 
diff --git a/modules/conop/pymod/export_rule_based.cc b/modules/conop/pymod/export_rule_based.cc
index 7b22b6d2d..ad0414dd2 100644
--- a/modules/conop/pymod/export_rule_based.cc
+++ b/modules/conop/pymod/export_rule_based.cc
@@ -28,7 +28,7 @@ void export_rule_based() {
   class_<RuleBasedProcessor, RuleBasedProcessorPtr, 
          boost::noncopyable, bases<Processor> >("RuleBasedProcessor", 
          init<CompoundLibPtr>())
-    .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,ConopAction>(
+    .def(init<CompoundLibPtr,bool,bool,ConopAction,ConopAction,bool,bool,bool,bool,bool,ConopAction>(
          (arg("lib"), arg("fix_elements")=true, arg("strict_hydrogens")=false,
          arg("unknown_res_treatment")=CONOP_WARN,
          arg("unknown_atom_treatment")=CONOP_WARN,
@@ -36,6 +36,7 @@ void export_rule_based() {
          arg("assign_torsions")=true,
          arg("connect")=true,
          arg("peptide_bonds")=true,
+         arg("connect_hetatm")=true,
          arg("zero_occ_treatment")=CONOP_WARN)))
     .add_property("fix_element", &RuleBasedProcessor::GetFixElement,
                  &RuleBasedProcessor::SetFixElement)
diff --git a/modules/conop/src/heuristic.hh b/modules/conop/src/heuristic.hh
index 2b70864aa..5a72250f1 100644
--- a/modules/conop/src/heuristic.hh
+++ b/modules/conop/src/heuristic.hh
@@ -41,8 +41,9 @@ public:
   virtual ProcessorPtr Copy() const {
     return ProcessorPtr(new HeuristicProcessor(*this));
   }
-  HeuristicProcessor(bool bf, bool at, bool cn, bool aa, ConopAction zo): 
-    Processor(bf, at, cn, aa, zo),
+  HeuristicProcessor(bool bf, bool at, bool cn, bool aa, bool ch,
+                     ConopAction zo): 
+    Processor(bf, at, cn, aa, ch, zo),
     lib_()
   {}
 
diff --git a/modules/conop/src/processor.cc b/modules/conop/src/processor.cc
index 844256a3c..840c8b4a7 100644
--- a/modules/conop/src/processor.cc
+++ b/modules/conop/src/processor.cc
@@ -304,6 +304,9 @@ void Processor::ConnectAtomsOfResidue(mol::ResidueHandle rh,
                                   a2.GetElement()=="D")) {
           continue;
         }
+        if (!connect_hetatm_ && a1.IsHetAtom() && a2.IsHetAtom()) {
+          continue;
+        }
         if (!this->GetCheckBondFeasibility()) {
           e.Connect(a1, a2, bond.order);
         } else { 
@@ -391,6 +394,9 @@ void Processor::DistanceBasedConnect(mol::AtomHandle atom) const
   for (mol::AtomHandleList::const_iterator it=alist.begin(),
        e=alist.end();it!=e;++it) {
     if (*it!=atom) {
+      if (!connect_hetatm_ && it->IsHetAtom() && atom.IsHetAtom()) {
+        continue;
+      }
       if (IsBondFeasible(atom, *it)) {
         if (it->GetResidue()==res_a ||
             Processor::AreResiduesConsecutive(res_a, it->GetResidue())) {
diff --git a/modules/conop/src/processor.hh b/modules/conop/src/processor.hh
index 81add923d..2c7bf495a 100644
--- a/modules/conop/src/processor.hh
+++ b/modules/conop/src/processor.hh
@@ -66,19 +66,20 @@ protected:
   void DistanceBasedConnect(mol::AtomHandle atom) const;
   mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const;
 public:
-  Processor(bool bf, bool at, bool cn, bool aa, ConopAction zo): check_bond_feasibility_(bf),
-    assign_torsions_(at), connect_(cn), connect_aa_(aa),
-    zero_occ_treatment_(zo) {}
+  Processor(bool bf, bool at, bool cn, bool aa, bool ch, ConopAction zo):
+    check_bond_feasibility_(bf), assign_torsions_(at), connect_(cn),
+    connect_aa_(aa), connect_hetatm_(ch), zero_occ_treatment_(zo) {}
   Processor(): check_bond_feasibility_(false),
     assign_torsions_(true), connect_(true), connect_aa_(true),
-    zero_occ_treatment_(CONOP_SILENT) {}
+    connect_hetatm_(true), zero_occ_treatment_(CONOP_SILENT) {}
+
   void SetConnect(bool connect) {
     connect_ = connect;
   }
-
   bool GetConnect() const {
     return connect_;
   }
+
   void SetAssignTorsions(bool flag) {
     assign_torsions_ = flag;
   }
@@ -92,23 +93,29 @@ public:
   void SetConnectAminoAcids(bool c) {
     connect_aa_ = c;
   }
+
+  bool GetConnectHetatm() const {
+    return connect_hetatm_;
+  }
+  void SetConnectHetatm(bool c) {
+    connect_hetatm_ = c;
+  }
+
   bool GetCheckBondFeasibility() const {
     return check_bond_feasibility_;
   }
 
-
   void SetCheckBondFeasibility(bool flag) {
     check_bond_feasibility_ = flag;
   }
 
-
   ConopAction GetZeroOccTreatment() const {
     return zero_occ_treatment_;
   }
-
   void SetZeroOccTreatment(ConopAction action) {
     zero_occ_treatment_ = action;
   }
+
   virtual String ToString() const = 0;
 protected:
   String OptionsToString() const; 
@@ -117,6 +124,7 @@ private:
   bool assign_torsions_;
   bool connect_;
   bool connect_aa_;
+  bool connect_hetatm_;
   ConopAction zero_occ_treatment_;
 };
 
diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh
index cea8d5fd3..15815f9fd 100644
--- a/modules/conop/src/rule_based.hh
+++ b/modules/conop/src/rule_based.hh
@@ -45,8 +45,8 @@ public:
 
   RuleBasedProcessor(CompoundLibPtr compound_lib, bool fe, bool sh,
                      ConopAction ur, ConopAction ua, bool bf, bool at, bool cn,
-                     bool aa, ConopAction zo): 
-    Processor(bf, at, cn, aa, zo), lib_(compound_lib), fix_element_(fe), 
+                     bool aa, bool ch, ConopAction zo): 
+    Processor(bf, at, cn, aa, ch, zo), lib_(compound_lib), fix_element_(fe), 
     strict_hydrogens_(sh), unk_res_treatment_(ur), 
     unk_atom_treatment_(ua)
   {
diff --git a/modules/conop/tests/test_heuristic_conop.cc b/modules/conop/tests/test_heuristic_conop.cc
index d6ba9f058..acb0ed9a0 100644
--- a/modules/conop/tests/test_heuristic_conop.cc
+++ b/modules/conop/tests/test_heuristic_conop.cc
@@ -179,5 +179,49 @@ BOOST_AUTO_TEST_CASE(quack_types_unknown_residues) {
 
 }
 
+
+BOOST_AUTO_TEST_CASE(hetatom_connect_heuristic) {
+
+  // STEP 1: Specify two atoms as hetatoms - they should be connected
+  // by the processor
+  mol::EntityHandle e = Builder()
+    .Chain("A")
+      .Residue("GLY")
+        .Atom("N", geom::Vec3(-8.22, 35.20, 22.39))
+        .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49))
+        .Atom("C", geom::Vec3(-8.59, 35.93, 20.06))
+        .Atom("O", geom::Vec3(-7.88, 36.30, 19.12))
+        .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53))
+  ;
+
+  ost::mol::AtomHandleList atoms = e.GetAtomList();
+  atoms[0].SetHetAtom(true); // N
+  atoms[1].SetHetAtom(true); // CA
+  HeuristicProcessor proc;
+  proc.Process(e);
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA")));
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C")));
+  
+  // STEP 2: Same thing again but we tell the processor NOT to connect
+  // hetatoms
+  e = Builder()
+    .Chain("A")
+      .Residue("GLY")
+        .Atom("N", geom::Vec3(-8.22, 35.20, 22.39))
+        .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49))
+        .Atom("C", geom::Vec3(-8.59, 35.93, 20.06))
+        .Atom("O", geom::Vec3(-7.88, 36.30, 19.12))
+        .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53))
+  ;
+
+  atoms = e.GetAtomList();
+  atoms[0].SetHetAtom(true); // N
+  atoms[1].SetHetAtom(true); // CA
+  proc.SetConnectHetatm(false);
+  proc.Process(e);
+  BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA")));
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C")));
+}
+
 BOOST_AUTO_TEST_SUITE_END();
 
diff --git a/modules/conop/tests/test_rule_based_conop.cc b/modules/conop/tests/test_rule_based_conop.cc
index 4e5222242..afe749222 100644
--- a/modules/conop/tests/test_rule_based_conop.cc
+++ b/modules/conop/tests/test_rule_based_conop.cc
@@ -26,6 +26,7 @@
 #include <ost/log.hh>
 #include <ost/conop/rule_based.hh>
 #include <ost/conop/conop.hh>
+#include <ost/mol/builder.hh>
 #include "helper.hh"
 
 using boost::unit_test_framework::test_suite;
@@ -55,13 +56,13 @@ BOOST_AUTO_TEST_CASE(rule_based_init_check)
   BOOST_CHECK_THROW(RuleBasedProcessor rbc1(lib), ost::Error);
   BOOST_CHECK_THROW(RuleBasedProcessor rbc2(lib, true, false, CONOP_WARN,
                                             CONOP_WARN, false, true, true, true,
-                                            CONOP_WARN), ost::Error);
+                                            true, CONOP_WARN), ost::Error);
   lib = load_lib();
   if (!lib) { return; }
   BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc3(lib));
   BOOST_CHECK_NO_THROW(RuleBasedProcessor rbc4(lib, true, false, CONOP_WARN,
                                                CONOP_WARN, false, true, true,
-                                               true, CONOP_WARN));
+                                               true, true, CONOP_WARN));
 }
 
 BOOST_AUTO_TEST_CASE(rule_based_set_get_flags)
@@ -211,4 +212,50 @@ BOOST_AUTO_TEST_CASE(rule_based_unk_res)
   BOOST_CHECK(!ent2.FindResidue("A", 1));
 }
 
+BOOST_AUTO_TEST_CASE(hetatom_connect_rule_based) {
+
+  CompoundLibPtr lib = load_lib();
+  if (!lib) { return; }
+
+  // STEP 1: Specify two atoms as hetatoms - they should be connected
+  // by the processor
+  mol::EntityHandle e = Builder()
+    .Chain("A")
+      .Residue("GLY")
+        .Atom("N", geom::Vec3(-8.22, 35.20, 22.39))
+        .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49))
+        .Atom("C", geom::Vec3(-8.59, 35.93, 20.06))
+        .Atom("O", geom::Vec3(-7.88, 36.30, 19.12))
+        .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53))
+  ;
+
+  ost::mol::AtomHandleList atoms = e.GetAtomList();
+  atoms[0].SetHetAtom(true); // N
+  atoms[1].SetHetAtom(true); // CA
+  RuleBasedProcessor proc(lib);
+  proc.Process(e);
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA")));
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C")));
+  
+  // STEP 2: Same thing again but we tell the processor NOT to connect
+  // hetatoms
+  e = Builder()
+    .Chain("A")
+      .Residue("GLY")
+        .Atom("N", geom::Vec3(-8.22, 35.20, 22.39))
+        .Atom("CA", geom::Vec3(-8.28, 36.36, 21.49))
+        .Atom("C", geom::Vec3(-8.59, 35.93, 20.06))
+        .Atom("O", geom::Vec3(-7.88, 36.30, 19.12))
+        .Atom("CB", geom::Vec3(-6.96, 37.11, 21.53))
+  ;
+
+  atoms = e.GetAtomList();
+  atoms[0].SetHetAtom(true); // N
+  atoms[1].SetHetAtom(true); // CA
+  proc.SetConnectHetatm(false);
+  proc.Process(e);
+  BOOST_CHECK(!mol::BondExists(e.FindAtom("A", 1, "N"), e.FindAtom("A", 1, "CA")));
+  BOOST_CHECK(mol::BondExists(e.FindAtom("A", 1, "CA"), e.FindAtom("A", 1, "C")));
+}
+
 BOOST_AUTO_TEST_SUITE_END();
diff --git a/modules/io/doc/profile.rst b/modules/io/doc/profile.rst
index ec6d4371c..e150a1346 100644
--- a/modules/io/doc/profile.rst
+++ b/modules/io/doc/profile.rst
@@ -94,7 +94,7 @@ The IOProfile Class
 
 .. class:: IOProfile(dialect='PDB', quack_mode=False, fault_tolerant=False,\
                      join_spread_atom_records=False, no_hetatms=False,\
-                     calpha_only=False, processor=None)
+                     calpha_only=False, read_conect=False, processor=None)
 
   Aggregates flags that control the import of molecular structures.
 
@@ -147,6 +147,16 @@ The IOProfile Class
     most useful in combination with protein-only PDB files to speed up
     subsequent processing and importing.
 
+  .. attribute:: read_conect
+
+    :type: bool
+
+    Only relevant when reading PDB format. When set to true, reads CONECT
+    statements and applies them in the pdb reader. This can result in
+    hydrogen bonds salt bridges etc. to be connected. Check the PDB format
+    definition for more info. Disables connectity generation in case of HETATM
+    in processor except for water.
+
   .. attribute:: processor
 
     :type: :class:`ost.conop.HeuristicProcessor`/:class:`ost.conop.RuleBasedProcessor`
diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py
index c47d044de..965a39ce4 100644
--- a/modules/io/pymod/__init__.py
+++ b/modules/io/pymod/__init__.py
@@ -83,7 +83,8 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None,
             fault_tolerant=None, load_multi=False, quack_mode=None,
             join_spread_atom_records=None, calpha_only=None,
             profile='DEFAULT', remote=False, remote_repo='pdb',
-            dialect=None, seqres=False, bond_feasibility_check=None):
+            dialect=None, seqres=False, bond_feasibility_check=None,
+            read_conect=False):
   """
   Load PDB file from disk and return one or more entities. Several options 
   allow to customize the exact behaviour of the PDB import. For more information 
@@ -169,6 +170,16 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None,
                                  If set, overrides the value of
                                  :attr:`ost.conop.Processor.check_bond_feasibility`
   :type bond_feasibility_check: :class:`bool`
+  :param read_conect: By default, OpenStructure doesn't read CONECT statements in
+                      a pdb file. Reason is that they're often missing and we prefer
+                      to rely on the chemical component dictionary from the PDB.
+                      However, there may be cases where you really want these CONECT
+                      statements. For example novel compounds with no entry in
+                      the chemical component dictionary. Setting this to True has
+                      two effects: 1) CONECT statements are read and blindly applied
+                      2) The processor does not connect any pair of HETATOM atoms in
+                      order to not interfer with the CONECT statements.
+  :type read_conect: :class:`bool`
 
   :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is 
       True.  
@@ -194,9 +205,12 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None,
   prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
   prof.dialect=_override(prof.dialect, dialect)
   prof.quack_mode=_override(prof.quack_mode, quack_mode)
+  prof.read_conect=_override(prof.read_conect, read_conect)
   if prof.processor:
     prof.processor.check_bond_feasibility=_override(prof.processor.check_bond_feasibility, 
                                                     bond_feasibility_check)
+    prof.processor.connect_hetatm=_override(prof.processor.connect_hetatm,
+                                            not read_conect)
   prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
   prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
                                           join_spread_atom_records)
diff --git a/modules/io/pymod/export_pdb_io.cc b/modules/io/pymod/export_pdb_io.cc
index addb94f3b..cad44b855 100644
--- a/modules/io/pymod/export_pdb_io.cc
+++ b/modules/io/pymod/export_pdb_io.cc
@@ -45,13 +45,14 @@ void remove_profiles() {
 void export_pdb_io()
 {
   class_<IOProfile>("IOProfile",
-         init<String,bool,bool,bool,bool,bool,
+         init<String,bool,bool,bool,bool,bool,bool,
               conop::ProcessorPtr>((arg("dialect")="PDB",
                                     arg("quack_mode")=false,
                                     arg("fault_tolerant")=false,
                                     arg("join_spread_atom_records")=false,
                                     arg("no_hetatms")=false,
                                     arg("calpha_only")=false,
+                                    arg("read_conect")=false,
                                     arg("processor")=conop::ProcessorPtr())))
     .def(init<const IOProfile&>())
     .def_readwrite("dialect", &IOProfile::dialect)
@@ -60,6 +61,7 @@ void export_pdb_io()
     .def_readwrite("no_hetatms", &IOProfile::no_hetatms)
     .def_readwrite("calpha_only", &IOProfile::calpha_only)
     .def_readwrite("join_spread_atom_records", &IOProfile::join_spread_atom_records)
+    .def_readwrite("read_conect", &IOProfile::read_conect)
     .def_readwrite("processor", &IOProfile::processor)
     .def("Copy", &IOProfile::Copy)
     .def(self_ns::str(self))
diff --git a/modules/io/src/mol/io_profile.hh b/modules/io/src/mol/io_profile.hh
index 22060e8c6..d8f078e39 100644
--- a/modules/io/src/mol/io_profile.hh
+++ b/modules/io/src/mol/io_profile.hh
@@ -31,15 +31,15 @@ namespace ost { namespace io {
 struct DLLEXPORT IOProfile {
 public:
   IOProfile(String d, bool qm, bool ft, bool js, bool nh, 
-            bool co, conop::ProcessorPtr proc=conop::ProcessorPtr()):
+            bool co, bool rc, conop::ProcessorPtr proc=conop::ProcessorPtr()):
     dialect(d), quack_mode(qm), fault_tolerant(ft), join_spread_atom_records(js), 
-    no_hetatms(nh), calpha_only(co), processor(proc)
+    no_hetatms(nh), calpha_only(co), read_conect(rc),  processor(proc)
   {
   }
 
   IOProfile(): dialect("PDB"), quack_mode(false), fault_tolerant(false), 
     join_spread_atom_records(false), no_hetatms(false),
-    calpha_only(false), processor()
+    calpha_only(false), read_conect(false), processor()
   { }
 
   String              dialect;
@@ -48,11 +48,12 @@ public:
   bool                join_spread_atom_records;
   bool                no_hetatms;
   bool                calpha_only;
+  bool                read_conect;
   conop::ProcessorPtr processor;
   IOProfile Copy()
   {
     return IOProfile(dialect, quack_mode, fault_tolerant, join_spread_atom_records, 
-                     no_hetatms, calpha_only,  
+                     no_hetatms, calpha_only, read_conect,  
                      processor ? processor->Copy() : conop::ProcessorPtr());
   }
 };
@@ -66,6 +67,7 @@ inline  std::ostream& operator<<(std::ostream& stream, const IOProfile& p)
          << "calpha_only=" << (p.calpha_only ? "True" : "False") << ", "
          << "fault_tolerant=" << (p.fault_tolerant ? "True" : "False") << ", "
          << "no_hetatms=" << (p.no_hetatms ? "True" : "False") << ", "
+         << "read_conect=" << (p.read_conect ? "True" : "False") << ", "
          << "processor=" << (p.processor ? p.processor->ToString() : "None") << ")";
   return stream;
 }
diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc
index 3aeefcbd7..e4e11d7bc 100644
--- a/modules/io/src/mol/pdb_reader.cc
+++ b/modules/io/src/mol/pdb_reader.cc
@@ -27,6 +27,7 @@
 #include <ost/profile.hh>
 #include <ost/log.hh>
 #include <ost/message.hh>
+#include <ost/mol/bond_handle.hh>
 
 #include <ost/conop/conop.hh>
 #include <ost/geom/mat3.hh>
@@ -356,11 +357,16 @@ void PDBReader::Import(mol::EntityHandle& ent,
         break;
          case 'C':
          case 'c':
-           if (curr_line.size()<20) {
+           if (curr_line.size()<6) {
              LOG_TRACE("skipping entry");
              continue;
            }
-           if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) {
+           else if (IEquals(curr_line.substr(0, 6), StringRef("CONECT", 6)) &&
+                    profile_.read_conect) {
+             LOG_TRACE("processing CONECT entry");
+             this->ParseConectEntry(curr_line, line_num_, ent);
+           }
+           else if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) {
              LOG_TRACE("processing COMPND entry");
              this->ParseCompndEntry(curr_line, line_num_);
            }
@@ -566,7 +572,8 @@ bool PDBReader::EnsureLineLength(const StringRef& line, size_t size)
 bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, 
                                String& chain_name,  StringRef& res_name,
                                mol::ResNum& resnum, StringRef& atom_name, 
-                               char& alt_loc, const StringRef& record_type)
+                               char& alt_loc, const StringRef& record_type,
+                               int& serial)
 {
   if (!this->EnsureLineLength(line, 27)) {
     return false;
@@ -602,6 +609,16 @@ bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num,
 
   char  ins_c=line[26];  
   resnum=to_res_num(res_num.second, ins_c);
+
+  std::pair<bool, int> tmp = line.substr(6, 5).trim().to_int();
+  if(!tmp.first) {
+    if (profile_.fault_tolerant) {
+      return false;
+    }
+    throw IOException(str(format("invalid atom serial on line %d") % line_num));
+  }
+  serial = tmp.second;
+
   return true;
 }
 
@@ -615,8 +632,10 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num,
   char alt_loc=0;
   StringRef res_name, atom_name;
   mol::ResNum res_num(0);
+  int serial = -1;
   if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, 
-                            atom_name, alt_loc, StringRef("ANISOU", 6))) {
+                            atom_name, alt_loc, StringRef("ANISOU", 6),
+                            serial)) {
     return;                            
   }
   double anisou[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0};  
@@ -673,8 +692,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num,
   String chain_name;
   StringRef res_name, atom_name;
   mol::ResNum res_num(0);
+  int serial = -1;
   if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, 
-                            atom_name, alt_loc, record_type)) {
+                            atom_name, alt_loc, record_type, serial)) {
     return;                            
   }
   std::pair<bool, Real> charge, radius;
@@ -860,6 +880,9 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num,
     ah.SetCharge(charge.second);
   }
   ah.SetHetAtom(record_type[0]=='H');
+  if(profile_.read_conect && serial != -1) {
+    this->amap_[serial] = ah;
+  }
 }
 
 void PDBReader::ParseHelixEntry(const StringRef& line)
@@ -911,4 +934,45 @@ void PDBReader::ParseStrandEntry(const StringRef& line)
   }
 }
 
+void PDBReader::ParseConectEntry (const StringRef& line, int line_num, mol::EntityHandle& ent)
+{
+  if (line.size()<16) {
+    if (profile_.fault_tolerant) {
+      LOG_WARNING("invalid CONECT record on line " << line_num 
+                  << ": record is too short");
+      return;
+    }
+    std::stringstream ss("invalid CONECT record on line ");
+    ss << line_num <<": record is too short";
+    throw IOException(ss.str());
+  }
+  if (line.rtrim().size()>80) {
+    if (profile_.fault_tolerant) {
+      LOG_WARNING("invalid CONECT record on line " << line_num 
+                  << ": record is too long");
+      return;
+    }
+    std::stringstream ss("invalid CONECT record on line ");
+    ss << line_num <<": whole record is too long";
+    throw IOException(ss.str());
+  }
+  mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT);
+  const int starting_atom = line.substr(6,5).trim().to_int().second;
+  // map for bonds, in the form of <serial_number, bond_order>
+  std::map<int, unsigned char> connected_atoms;
+  for (int i=0; i<4; i++) {
+    if (static_cast<int>(line.length()) < 11+i*5+5) break;
+    const int connected_atom = line.substr(11+i*5, 5).trim().to_int().second;
+    connected_atoms[connected_atom]+=1;
+  }
+  for (auto& pair : connected_atoms) {
+    auto at_one_it = this->amap_.find(starting_atom);
+    auto at_two_it = this->amap_.find(pair.first);
+    if(at_one_it != this->amap_.end() && at_two_it != this->amap_.end()) {
+      editor.Connect(at_one_it->second, at_two_it->second, pair.second);
+    }
+  }
+}
+
+
 }}
diff --git a/modules/io/src/mol/pdb_reader.hh b/modules/io/src/mol/pdb_reader.hh
index df98fa2dd..0d75d1612 100644
--- a/modules/io/src/mol/pdb_reader.hh
+++ b/modules/io/src/mol/pdb_reader.hh
@@ -83,13 +83,15 @@ private:
   bool ParseAtomIdent(const StringRef& line, int line_num, 
                       String& chain_name, StringRef& res, 
                       mol::ResNum& resnum, StringRef& atom_name, char& alt_loc,
-                      const StringRef& record_type);
+                      const StringRef& record_type, int& serial);
   void ParseAnisou(const StringRef& line, int line_num,
                    mol::EntityHandle& h);
   void ParseHelixEntry(const StringRef& line);
   void ParseStrandEntry(const StringRef& line);
   void Init(const boost::filesystem::path& loc);
   bool EnsureLineLength(const StringRef& line, size_t size);
+  void ParseConectEntry(const StringRef& line, int line_num, mol::EntityHandle& ent);
+  std::map<int, mol::AtomHandle> amap_; // <serial_number, AtomHandle>
   mol::ChainHandle curr_chain_;
   mol::ResidueHandle curr_residue_;
   int chain_count_;
diff --git a/modules/io/src/mol/pdb_writer.cc b/modules/io/src/mol/pdb_writer.cc
index 7b28c8b2a..d4461402c 100644
--- a/modules/io/src/mol/pdb_writer.cc
+++ b/modules/io/src/mol/pdb_writer.cc
@@ -35,6 +35,7 @@
 #include <ost/mol/residue_handle.hh>
 #include <ost/mol/chain_handle.hh>
 #include <ost/mol/entity_visitor.hh>
+#include <ost/mol/bond_handle.hh>
 #include "pdb_writer.hh"
 
 using boost::format;
@@ -337,20 +338,21 @@ public:
   }
 private:
 public:
-  virtual bool VisitAtom(const mol::AtomHandle& atom) {
+virtual bool VisitAtom(const mol::AtomHandle& atom) {
     if (atom.IsHetAtom()) {
       bool has_partner=false;
       int atom_index=atom_indices_[atom.GetHashCode()];
       mol::AtomHandleList partners=atom.GetBondPartners();
       std::list<int> partner_indices;
-      for (mol::AtomHandleList::const_iterator i=partners.begin();
-           i!=partners.end(); ++i) {
-        int pind=atom_indices_[i->GetHashCode()];
-        if (pind!=0) {
-          partner_indices.push_back(pind);
-          has_partner=true;
+        for (auto partner : partners){
+          mol::BondHandle bond = atom.FindBondToAtom(partner);
+          int pind=atom_indices_[partner.GetHashCode()];
+          if (pind!=0) {   
+            for (int i=0; i < int(bond.GetBondOrder()); i++)
+              {partner_indices.push_back(pind);}
+            has_partner=true;
+          }
         }
-      }
       if (has_partner) {
         write_conect(ostr_, atom_index, partner_indices);
       }
diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc
index 400034f9b..99071118e 100644
--- a/modules/io/tests/test_io_pdb.cc
+++ b/modules/io/tests/test_io_pdb.cc
@@ -724,13 +724,15 @@ BOOST_AUTO_TEST_CASE(write_ter6)
                             "testfiles/pdb/ter4-out.pdb"));
 }
 
-BOOST_AUTO_TEST_CASE(write_conect)
+BOOST_AUTO_TEST_CASE(read_write_conect)
 {
   // this scope is required to force the writer stream to be closed before
   // opening the file again in compare_files. Avoids a race condition.
   {
-    PDBReader reader(String("testfiles/pdb/conect.pdb"), IOProfile());
-    PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), IOProfile());
+    IOProfile profile;
+    profile.read_conect=true;
+    PDBReader reader(String("testfiles/pdb/conect.pdb"), profile);
+    PDBWriter writer(String("testfiles/pdb/conect-out.pdb"), profile);
     mol::EntityHandle ent=mol::CreateEntity();
     reader.Import(ent);
     conop::HeuristicProcessor heu_proc;
@@ -973,7 +975,7 @@ BOOST_AUTO_TEST_CASE(charmm_rname)
 {
   {
     PDBWriter writer(String("testfiles/pdb/charmm_rname-out.pdb"),
-                     IOProfile("CHARMM", false, false, false, false, false));
+                     IOProfile("CHARMM", false, false, false, false, false, false));
 
     mol::EntityHandle ent=mol::CreateEntity();
     mol::XCSEditor edi=ent.EditXCS();
@@ -992,7 +994,7 @@ BOOST_AUTO_TEST_CASE(charmm_longcname)
 {
   {
     PDBWriter writer(String("testfiles/pdb/charmm_longcname-out.pdb"),
-                     IOProfile("CHARMM", false, false, false, false, false));
+                     IOProfile("CHARMM", false, false, false, false, false, false));
 
     mol::EntityHandle ent=mol::CreateEntity();
     mol::XCSEditor edi=ent.EditXCS();
@@ -1011,7 +1013,7 @@ BOOST_AUTO_TEST_CASE(write_charmm_ter)
 {
   {
     PDBWriter writer(String("testfiles/pdb/charmm_ter-out.pdb"),
-                     IOProfile("CHARMM", false, false, false, false, false));
+                     IOProfile("CHARMM", false, false, false, false, false, false));
 
     mol::EntityHandle ent=mol::CreateEntity();
     mol::XCSEditor edi=ent.EditXCS();
diff --git a/modules/io/tests/test_io_pdb.py b/modules/io/tests/test_io_pdb.py
index 77a504fd8..7a34502ff 100644
--- a/modules/io/tests/test_io_pdb.py
+++ b/modules/io/tests/test_io_pdb.py
@@ -51,7 +51,36 @@ class TestPDB(unittest.TestCase):
     crambin_pdb = io.LoadPDB('1crn', remote=True, remote_repo='pdb')
     self.assertEqual(len(crambin_pdb.residues), 46)
     self.assertEqual(len(crambin_pdb.atoms), 327)
-    
+
+  def test_conect(self):
+    """ See whether read_conect has an effect on reading CONECT
+    """
+    prot = io.LoadPDB("testfiles/pdb/conect.pdb")
+    res = prot.FindResidue("A", mol.ResNum(3))
+    a1 = res.FindAtom("N")
+    a2 = res.FindAtom("CA")
+    tmp = sorted([str(a1), str(a2)])
+    bond = None
+    for b in prot.bonds:
+      if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp:
+        bond = b
+        break
+    self.assertTrue(bond is not None)
+    self.assertEqual(bond.bond_order, 1)
+
+    prot = io.LoadPDB("testfiles/pdb/conect.pdb", read_conect=True)
+    res = prot.FindResidue("A", mol.ResNum(3))
+    a1 = res.FindAtom("N")
+    a2 = res.FindAtom("CA")
+    tmp = sorted([str(a1), str(a2)])
+    bond = None
+    for b in prot.bonds:
+      if sorted([str(b.GetFirst()), str(b.GetSecond())]) == tmp:
+        bond = b
+        break
+    self.assertTrue(bond is not None)
+    self.assertEqual(bond.bond_order, 2) # now it should be two
+
 if __name__== '__main__':
   from ost import testutils
   testutils.RunTests()
diff --git a/modules/io/tests/testfiles/pdb/conect.pdb b/modules/io/tests/testfiles/pdb/conect.pdb
index b4ce6bd8e..3b29d58d3 100644
--- a/modules/io/tests/testfiles/pdb/conect.pdb
+++ b/modules/io/tests/testfiles/pdb/conect.pdb
@@ -13,26 +13,26 @@ ATOM     12  CB  VAL A   2      -5.589  17.486  20.437  0.50 18.24           C
 ATOM     13  CG1 VAL A   2      -7.059  17.853  20.200  0.50 19.12           C  
 ATOM     14  CG2 VAL A   2      -4.672  18.535  19.802  0.50 14.29           C  
 TER      15      VAL A   2                                                      
-HETATM   16  N   PS0 A   3     -11.234  16.802  30.197  0.50  7.63           N  
-HETATM   17  CA  PS0 A   3     -11.284  16.497  28.779  0.50  6.15           C  
-HETATM   18  C   PS0 A   3     -10.818  17.753  28.010  0.50  8.61           C  
-HETATM   19  OS  PS0 A   3     -11.697  18.822  28.249  0.50  4.53           O  
-HETATM   20  CB  PS0 A   3     -12.670  16.079  28.322  0.50 12.53           C  
-HETATM   21  CG  PS0 A   3     -13.432  14.971  29.048  0.50 11.04           C  
-HETATM   22  CD1 PS0 A   3     -13.076  13.629  28.983  0.50 10.33           C  
-HETATM   23  CD2 PS0 A   3     -14.560  15.373  29.807  0.50 12.71           C  
-HETATM   24  CE1 PS0 A   3     -13.818  12.638  29.661  0.50 11.11           C  
-HETATM   25  CE2 PS0 A   3     -15.300  14.380  30.483  0.50 14.32           C  
-HETATM   26  CZ  PS0 A   3     -14.917  13.043  30.412  0.50 10.61           C  
-HETATM   27  CM  PS0 A   3      -9.405  18.196  28.546  0.50  6.25           C  
+HETATM   16  N   UNL A   3     -11.234  16.802  30.197  0.50  7.63           N  
+HETATM   17  CA  UNL A   3     -11.284  16.497  28.779  0.50  6.15           C  
+HETATM   18  C   UNL A   3     -10.818  17.753  28.010  0.50  8.61           C  
+HETATM   19  OS  UNL A   3     -11.697  18.822  28.249  0.50  4.53           O  
+HETATM   20  CB  UNL A   3     -12.670  16.079  28.322  0.50 12.53           C  
+HETATM   21  CG  UNL A   3     -13.432  14.971  29.048  0.50 11.04           C  
+HETATM   22  CD1 UNL A   3     -13.076  13.629  28.983  0.50 10.33           C  
+HETATM   23  CD2 UNL A   3     -14.560  15.373  29.807  0.50 12.71           C  
+HETATM   24  CE1 UNL A   3     -13.818  12.638  29.661  0.50 11.11           C  
+HETATM   25  CE2 UNL A   3     -15.300  14.380  30.483  0.50 14.32           C  
+HETATM   26  CZ  UNL A   3     -14.917  13.043  30.412  0.50 10.61           C  
+HETATM   27  CM  UNL A   3      -9.405  18.196  28.546  0.50  6.25           C  
 HETATM   28  O   HOH A   4      -3.126  40.621  48.726  1.00 47.60           O  
 HETATM   29  O   HOH A   5      -2.279  35.565  45.117  1.00 43.93           O  
 HETATM   30  O   HOH A   6       5.765  35.848  41.846  1.00 36.24           O  
 HETATM   31  O   HOH A   7     -12.666  40.044  22.441  1.00 41.24           O  
 HETATM   32  O   HOH A   8      -4.462  37.411  18.124  1.00 30.94           O  
 HETATM   33  O   HOH A   9      -1.109  26.454  19.470  1.00 39.06           O  
-CONECT   16   17
-CONECT   17   16   18   20
+CONECT   16   17   17
+CONECT   17   16   16   18   20
 CONECT   18   17   19   27
 CONECT   19   18
 CONECT   20   17   21
diff --git a/modules/mol/mm/src/observer.cc b/modules/mol/mm/src/observer.cc
index 90a862083..0abc555da 100644
--- a/modules/mol/mm/src/observer.cc
+++ b/modules/mol/mm/src/observer.cc
@@ -55,7 +55,7 @@ void TrajWriter::Init(boost::shared_ptr<OpenMM::Context> c,
   registered_ = true;
 
   context_ = c;
-  ost::io::IOProfile profile("CHARMM",false,false,false,false,false);
+  ost::io::IOProfile profile("CHARMM",false,false,false,false,false,false);
   ost::io::PDBWriter writer(pdb_filename_, profile);
   writer.Write(ent.GetAtomList());
 
-- 
GitLab