From af6f07b7feba19e9910a48725b529b1144e4a988 Mon Sep 17 00:00:00 2001
From: Xavier Robin <xavalias-github@xavier.robin.name>
Date: Thu, 15 Jun 2023 11:08:40 +0200
Subject: [PATCH] feat: support for V2000 M  CHG lines

---
 modules/io/src/mol/sdf_reader.cc         | 85 ++++++++++++++++++++++++
 modules/io/src/mol/sdf_reader.hh         |  7 ++
 modules/io/tests/test_io_sdf.py          |  7 ++
 modules/io/tests/testfiles/sdf/m_chg.sdf | 19 ++++++
 4 files changed, 118 insertions(+)
 create mode 100644 modules/io/tests/testfiles/sdf/m_chg.sdf

diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc
index ed53e5a0f..a64a46ec9 100644
--- a/modules/io/src/mol/sdf_reader.cc
+++ b/modules/io/src/mol/sdf_reader.cc
@@ -84,6 +84,8 @@ void SDFReader::Import(mol::EntityHandle& ent)
       AddAtom(ParseAtom(line, line_num), line_num, ent, true, editor);
     } else if (version_ == "V2000" && line_num<=bond_count_+atom_count_+4) {
       AddBond(ParseBond(line, line_num), line_num, ent, editor);
+    } else if (version_ == "V2000" &&  boost::iequals(line.substr(0,6), "M  CHG")) {
+      AddCharge(ParseMCharge(line, line_num), line_num, ent, editor);
     } else if (boost::iequals(line.substr(0,2), "> ")) {
       // parse data items
       int data_header_start = line.find('<');
@@ -130,6 +132,7 @@ void SDFReader::ClearState(const boost::filesystem::path& loc)
   version_="";
   v3000_bond_block_=false;
   v3000_atom_block_=false;
+  charges_reset_=false;
 }
 
 void SDFReader::NextMolecule()
@@ -141,6 +144,7 @@ void SDFReader::NextMolecule()
   version_="";
   v3000_bond_block_=false;
   v3000_atom_block_=false;
+  charges_reset_=false;
   curr_residue_ = ost::mol::ResidueHandle();
   curr_chain_ = ost::mol::ChainHandle();
 }
@@ -365,6 +369,87 @@ void SDFReader::AddBond(const bond_data& bond_tuple, int line_num, mol::EntityHa
             << s_type << ") ");
 }
 
+
+void SDFReader::ResetCharges()
+// from doc of V2000 Atom Block:
+// > Retained for compatibility with older Ctabs, M CHG and M RAD lines take
+// > precedence.
+// Therefore we must reset all charges of the residue if we encounter an
+// M  CHG line.
+{
+  LOG_DEBUG("Resetting all charges to 0.");
+  for (mol::AtomHandle & atom : curr_residue_.GetAtomList()) {
+    atom.SetCharge(0.0);
+  }
+  charges_reset_=true;
+}
+
+
+SDFReader::charge_data SDFReader::ParseMCharge(const String& line, int line_num)
+{
+
+  LOG_TRACE( "line: [" << line << "]" );
+
+  if (!charges_reset_) {
+    ResetCharges();
+  }
+
+  if(line.length()<15 || line.length()>17) {
+    // Handle the case where we have trailing space characters
+    if (line.length()>17 && boost::trim_copy(line.substr(17)) == "") {
+      LOG_DEBUG( "Ignoring trailing space" );
+    }
+    else {
+      String msg="Bad Charge line %d: Not correct number of characters on the"
+                 " line: %i (should be between 15 and 17)";
+      throw IOException(str(format(msg) % line_num % line.length()));
+    }
+  }
+
+  String atom_index=line.substr(10,3);
+  String charge=line.substr(14,3);
+
+  return std::make_tuple(atom_index, charge);
+}
+
+  void SDFReader::AddCharge(const charge_data& charge_tuple, int line_num, mol::EntityHandle& ent,
+                        mol::XCSEditor& editor)
+{
+  String s_atom_index, s_charge;
+  tie(s_atom_index, s_charge) = charge_tuple;
+
+  int atom_index;
+  Real charge;
+
+  try {
+    atom_index=boost::lexical_cast<int>(boost::trim_copy(s_atom_index));
+    if (atom_index > atom_count_) {
+      String msg="Bad charge line %d: Atom index"
+                      " '%d' greater than number of atoms in the molecule (%d).";
+      throw IOException(str(format(msg) % line_num % atom_index % atom_count_));
+    } else if (atom_index < 1) {
+      String msg="Bad charge line %d: Atom index %d < 1.";
+      throw IOException(str(format(msg) % line_num % atom_index));
+    }
+  } catch(boost::bad_lexical_cast&) {
+    String msg="Bad charge line %d: Can't convert atom index"
+                " '%s' to integral constant.";
+    throw IOException(str(format(msg) % line_num % s_atom_index));
+  }
+
+  try {
+    charge=boost::lexical_cast<Real>(boost::trim_copy(s_charge));
+  } catch(boost::bad_lexical_cast&) {
+    String msg="Bad charge line %d: Can't convert charge"
+                " '%s' to real number.";
+    throw IOException(str(format(msg) % line_num % s_charge));
+  }
+
+  curr_residue_.GetAtomList()[atom_index - 1].SetCharge(charge);
+
+  LOG_DEBUG("Setting charge of atom " << atom_index - 1 << " to " << charge);
+}
+
 SDFReader::v3000_line_tokens SDFReader::TokenizeV3000Line(const String& line,
                                                           int line_num,
                                                           int num_posval)
diff --git a/modules/io/src/mol/sdf_reader.hh b/modules/io/src/mol/sdf_reader.hh
index f684a5325..f704462c2 100644
--- a/modules/io/src/mol/sdf_reader.hh
+++ b/modules/io/src/mol/sdf_reader.hh
@@ -47,6 +47,7 @@ public:
 private:
   typedef std::tuple<int, String, String, String, String, String> atom_data;
   typedef std::tuple<String, String, String> bond_data;
+  typedef std::tuple<String, String> charge_data;
   typedef std::tuple<std::vector<String>, std::map<String, String>> v3000_line_tokens;
 
   boost::iostreams::filtering_stream<boost::iostreams::input>& GetLine(
@@ -68,6 +69,11 @@ private:
   void AddBond(const bond_data& bond_tuple, int line_num, mol::EntityHandle& ent,
                        mol::XCSEditor& editor);
 
+  charge_data ParseMCharge(const String& line, int line_num);
+  void AddCharge(const charge_data& charge_tuple, int line_num, mol::EntityHandle& ent,
+                       mol::XCSEditor& editor);
+  void ResetCharges();
+
   // V3000 methods
   v3000_line_tokens TokenizeV3000Line(const String& line, int line_num,
                                       int num_posval);
@@ -94,6 +100,7 @@ private:
   String version_;
   bool v3000_atom_block_;
   bool v3000_bond_block_;
+  bool charges_reset_;
 };
 
 }}
diff --git a/modules/io/tests/test_io_sdf.py b/modules/io/tests/test_io_sdf.py
index 735158fc3..5ca35b7fe 100644
--- a/modules/io/tests/test_io_sdf.py
+++ b/modules/io/tests/test_io_sdf.py
@@ -38,6 +38,13 @@ class TestSDF(unittest.TestCase):
       ent.FindAtom("00001_Simple Ligand", 1, "6").charge = -4
       io.EntityToSDFStr(ent)
 
+  def test_MChg(self):
+    ent = io.LoadSDF('testfiles/sdf/m_chg.sdf')
+    cl_at = ent.FindAtom("00001_Simple Ligand", 1, "6")
+    self.assertEqual(cl_at.charge, -1)
+    # Charge from atom line is ignored
+    n_at = ent.FindAtom("00001_Simple Ligand", 1, "1")
+    self.assertEqual(n_at.charge, 0)
     
 if __name__== '__main__':
   from ost import testutils
diff --git a/modules/io/tests/testfiles/sdf/m_chg.sdf b/modules/io/tests/testfiles/sdf/m_chg.sdf
new file mode 100644
index 000000000..e13bae5a7
--- /dev/null
+++ b/modules/io/tests/testfiles/sdf/m_chg.sdf
@@ -0,0 +1,19 @@
+Simple Ligand
+
+ Teststructure
+  6  6  0  0  1  0            999 V2000
+    0.0000    0.0000    0.0000 N   0  3  0  0  0  0
+    1.0000    0.0000    0.0000 C   0  0  0  0  0  0
+    0.0000    1.0000    0.0000 O   0  0  0  0  0  0
+    1.0000    1.0000    0.0000 S   0  0  0  0  0  0
+    2.0000    2.0000    0.0000 C   0  0  0  0  0  0
+   -1.0000   -1.0000    0.0000 Cl  0  0  0  0  0  0
+  1  2  2  0  0  0
+  1  3  1  0  0  0
+  1  6  1  0  0  0
+  2  4  1  0  0  0
+  3  4  1  0  0  0
+  4  5  3  0  0  0
+M  CHG  1   6  -1
+M  END
+$$$$
-- 
GitLab