diff --git a/modules/io/src/mol/sdf_reader.cc b/modules/io/src/mol/sdf_reader.cc index ed53e5a0f872b979947ef226a09d7fab4bf019ec..a64a46ec918332e4bdb27af3e6fe0c657ea80352 100644 --- a/modules/io/src/mol/sdf_reader.cc +++ b/modules/io/src/mol/sdf_reader.cc @@ -84,6 +84,8 @@ void SDFReader::Import(mol::EntityHandle& ent) AddAtom(ParseAtom(line, line_num), line_num, ent, true, editor); } else if (version_ == "V2000" && line_num<=bond_count_+atom_count_+4) { AddBond(ParseBond(line, line_num), line_num, ent, editor); + } else if (version_ == "V2000" && boost::iequals(line.substr(0,6), "M CHG")) { + AddCharge(ParseMCharge(line, line_num), line_num, ent, editor); } else if (boost::iequals(line.substr(0,2), "> ")) { // parse data items int data_header_start = line.find('<'); @@ -130,6 +132,7 @@ void SDFReader::ClearState(const boost::filesystem::path& loc) version_=""; v3000_bond_block_=false; v3000_atom_block_=false; + charges_reset_=false; } void SDFReader::NextMolecule() @@ -141,6 +144,7 @@ void SDFReader::NextMolecule() version_=""; v3000_bond_block_=false; v3000_atom_block_=false; + charges_reset_=false; curr_residue_ = ost::mol::ResidueHandle(); curr_chain_ = ost::mol::ChainHandle(); } @@ -365,6 +369,87 @@ void SDFReader::AddBond(const bond_data& bond_tuple, int line_num, mol::EntityHa << s_type << ") "); } + +void SDFReader::ResetCharges() +// from doc of V2000 Atom Block: +// > Retained for compatibility with older Ctabs, M CHG and M RAD lines take +// > precedence. +// Therefore we must reset all charges of the residue if we encounter an +// M CHG line. +{ + LOG_DEBUG("Resetting all charges to 0."); + for (mol::AtomHandle & atom : curr_residue_.GetAtomList()) { + atom.SetCharge(0.0); + } + charges_reset_=true; +} + + +SDFReader::charge_data SDFReader::ParseMCharge(const String& line, int line_num) +{ + + LOG_TRACE( "line: [" << line << "]" ); + + if (!charges_reset_) { + ResetCharges(); + } + + if(line.length()<15 || line.length()>17) { + // Handle the case where we have trailing space characters + if (line.length()>17 && boost::trim_copy(line.substr(17)) == "") { + LOG_DEBUG( "Ignoring trailing space" ); + } + else { + String msg="Bad Charge line %d: Not correct number of characters on the" + " line: %i (should be between 15 and 17)"; + throw IOException(str(format(msg) % line_num % line.length())); + } + } + + String atom_index=line.substr(10,3); + String charge=line.substr(14,3); + + return std::make_tuple(atom_index, charge); +} + + void SDFReader::AddCharge(const charge_data& charge_tuple, int line_num, mol::EntityHandle& ent, + mol::XCSEditor& editor) +{ + String s_atom_index, s_charge; + tie(s_atom_index, s_charge) = charge_tuple; + + int atom_index; + Real charge; + + try { + atom_index=boost::lexical_cast<int>(boost::trim_copy(s_atom_index)); + if (atom_index > atom_count_) { + String msg="Bad charge line %d: Atom index" + " '%d' greater than number of atoms in the molecule (%d)."; + throw IOException(str(format(msg) % line_num % atom_index % atom_count_)); + } else if (atom_index < 1) { + String msg="Bad charge line %d: Atom index %d < 1."; + throw IOException(str(format(msg) % line_num % atom_index)); + } + } catch(boost::bad_lexical_cast&) { + String msg="Bad charge line %d: Can't convert atom index" + " '%s' to integral constant."; + throw IOException(str(format(msg) % line_num % s_atom_index)); + } + + try { + charge=boost::lexical_cast<Real>(boost::trim_copy(s_charge)); + } catch(boost::bad_lexical_cast&) { + String msg="Bad charge line %d: Can't convert charge" + " '%s' to real number."; + throw IOException(str(format(msg) % line_num % s_charge)); + } + + curr_residue_.GetAtomList()[atom_index - 1].SetCharge(charge); + + LOG_DEBUG("Setting charge of atom " << atom_index - 1 << " to " << charge); +} + SDFReader::v3000_line_tokens SDFReader::TokenizeV3000Line(const String& line, int line_num, int num_posval) diff --git a/modules/io/src/mol/sdf_reader.hh b/modules/io/src/mol/sdf_reader.hh index f684a532549b83c0a614bc3da9dd1b54ba310274..f704462c2c93d9467b1bab3ba933b28ab7e50f25 100644 --- a/modules/io/src/mol/sdf_reader.hh +++ b/modules/io/src/mol/sdf_reader.hh @@ -47,6 +47,7 @@ public: private: typedef std::tuple<int, String, String, String, String, String> atom_data; typedef std::tuple<String, String, String> bond_data; + typedef std::tuple<String, String> charge_data; typedef std::tuple<std::vector<String>, std::map<String, String>> v3000_line_tokens; boost::iostreams::filtering_stream<boost::iostreams::input>& GetLine( @@ -68,6 +69,11 @@ private: void AddBond(const bond_data& bond_tuple, int line_num, mol::EntityHandle& ent, mol::XCSEditor& editor); + charge_data ParseMCharge(const String& line, int line_num); + void AddCharge(const charge_data& charge_tuple, int line_num, mol::EntityHandle& ent, + mol::XCSEditor& editor); + void ResetCharges(); + // V3000 methods v3000_line_tokens TokenizeV3000Line(const String& line, int line_num, int num_posval); @@ -94,6 +100,7 @@ private: String version_; bool v3000_atom_block_; bool v3000_bond_block_; + bool charges_reset_; }; }} diff --git a/modules/io/tests/test_io_sdf.py b/modules/io/tests/test_io_sdf.py index 735158fc3c38f8ed2345415ae913143f8b7d1bf1..5ca35b7fe4f576f7a16830fceb9c9f71b815b425 100644 --- a/modules/io/tests/test_io_sdf.py +++ b/modules/io/tests/test_io_sdf.py @@ -38,6 +38,13 @@ class TestSDF(unittest.TestCase): ent.FindAtom("00001_Simple Ligand", 1, "6").charge = -4 io.EntityToSDFStr(ent) + def test_MChg(self): + ent = io.LoadSDF('testfiles/sdf/m_chg.sdf') + cl_at = ent.FindAtom("00001_Simple Ligand", 1, "6") + self.assertEqual(cl_at.charge, -1) + # Charge from atom line is ignored + n_at = ent.FindAtom("00001_Simple Ligand", 1, "1") + self.assertEqual(n_at.charge, 0) if __name__== '__main__': from ost import testutils diff --git a/modules/io/tests/testfiles/sdf/m_chg.sdf b/modules/io/tests/testfiles/sdf/m_chg.sdf new file mode 100644 index 0000000000000000000000000000000000000000..e13bae5a74e887fb15e6bd876a38ee642ec09977 --- /dev/null +++ b/modules/io/tests/testfiles/sdf/m_chg.sdf @@ -0,0 +1,19 @@ +Simple Ligand + + Teststructure + 6 6 0 0 1 0 999 V2000 + 0.0000 0.0000 0.0000 N 0 3 0 0 0 0 + 1.0000 0.0000 0.0000 C 0 0 0 0 0 0 + 0.0000 1.0000 0.0000 O 0 0 0 0 0 0 + 1.0000 1.0000 0.0000 S 0 0 0 0 0 0 + 2.0000 2.0000 0.0000 C 0 0 0 0 0 0 + -1.0000 -1.0000 0.0000 Cl 0 0 0 0 0 0 + 1 2 2 0 0 0 + 1 3 1 0 0 0 + 1 6 1 0 0 0 + 2 4 1 0 0 0 + 3 4 1 0 0 0 + 4 5 3 0 0 0 +M CHG 1 6 -1 +M END +$$$$