diff --git a/modules/conop/pymod/export_compound.cc b/modules/conop/pymod/export_compound.cc index 400fad7738b0d3acad1df591aeb380e91146a2f8..98acbb126527f4c274cefb5b6e70377861f8dd77 100644 --- a/modules/conop/pymod/export_compound.cc +++ b/modules/conop/pymod/export_compound.cc @@ -126,6 +126,9 @@ void export_Compound() { .add_property("inchi_key", make_function(&Compound::GetInchiKey, return_value_policy<copy_const_reference>())) + .add_property("smiles", + make_function(&Compound::GetSMILES, + return_value_policy<copy_const_reference>())) ; class_<AtomSpec>("AtomSpec", no_init) diff --git a/modules/conop/src/compound.hh b/modules/conop/src/compound.hh index 948cecee588ed3a594ea8775c4524f1d5ff87c4e..8d1d7be1d5f569819ec9d919ca6533d1ba304167 100644 --- a/modules/conop/src/compound.hh +++ b/modules/conop/src/compound.hh @@ -149,6 +149,7 @@ public: name_(), inchi_(), inchi_key_(), + smiles_(), atom_specs_(), bond_specs_(), chem_class_(), @@ -252,6 +253,10 @@ public: const String& GetInchiKey() { return inchi_key_; } + void SetSMILES(const String& smiles) { smiles_=smiles; } + + const String& GetSMILES() { return smiles_; } + const BondSpecList& GetBondSpecs() const { return bond_specs_; } @@ -281,6 +286,7 @@ private: String name_; String inchi_; String inchi_key_; + String smiles_; AtomSpecList atom_specs_; BondSpecList bond_specs_; mol::ChemClass chem_class_; diff --git a/modules/conop/src/compound_lib.cc b/modules/conop/src/compound_lib.cc index 3c434b86b306c807770a4179186574002f64cf48..9529157ef463eaf054611853ca256293a5eae462 100644 --- a/modules/conop/src/compound_lib.cc +++ b/modules/conop/src/compound_lib.cc @@ -62,7 +62,8 @@ const char* CREATE_CMD[]={ " pdb_modified TIMESTAMP, " " name VARCHAR(256), " " inchi_code TEXT, " -" inchi_key TEXT " +" inchi_key TEXT, " +" smiles TEXT " ");", " CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds " " (tlc, dialect)", @@ -98,8 +99,9 @@ const char* CREATE_CMD[]={ const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds " -" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, pdb_modified, name, inchi_code, inchi_key) " -" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?)"; +" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, " +" pdb_modified, name, inchi_code, inchi_key, smiles) " +" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?, ?)"; const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms " " (compound_id, name, alt_name, element, is_aromatic, stereo_conf, " @@ -262,6 +264,8 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) compound->GetInchi().length(), NULL); sqlite3_bind_text(stmt, 11, compound->GetInchiKey().c_str(), compound->GetInchiKey().length(), NULL); + sqlite3_bind_text(stmt, 12, compound->GetSMILES().c_str(), + compound->GetSMILES().length(), NULL); } else { LOG_ERROR(sqlite3_errmsg(db_->ptr)); sqlite3_finalize(stmt); @@ -405,6 +409,13 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly) &stmt, NULL); lib->inchi_available_ = retval==SQLITE_OK; sqlite3_finalize(stmt); + // check if SMILES are available + aq="SELECT smiles FROM chem_compounds LIMIT 1"; + retval=sqlite3_prepare_v2(lib->db_->ptr, aq.c_str(), + static_cast<int>(aq.length()), + &stmt, NULL); + lib->smiles_available_ = retval==SQLITE_OK; + sqlite3_finalize(stmt); lib->creation_date_ = lib->GetCreationDate(); lib->ost_version_used_ = lib->GetOSTVersionUsed(); @@ -471,17 +482,24 @@ CompoundPtr CompoundLib::FindCompound(const String& id, return i->second; } String query="SELECT id, tlc, olc, chem_class, dialect, formula"; - int col_offset = 0; + int col_offset_inchi = 0; + int col_offset_smiles = 0; if(chem_type_available_) { query+=", chem_type"; - col_offset+=1; + col_offset_inchi+=1; + col_offset_smiles+=1; if(name_available_) { query+=", name"; - col_offset+=1; + col_offset_inchi+=1; + col_offset_smiles+=1; } } if(inchi_available_) { query+=", inchi_code, inchi_key"; + col_offset_smiles+=2; + } + if(smiles_available_) { + query+=", smiles"; } query+=" FROM chem_compounds" @@ -515,15 +533,22 @@ CompoundPtr CompoundLib::FindCompound(const String& id, } } if (inchi_available_) { - const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset)); + const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi)); if (inchi_code) { compound->SetInchi(inchi_code); } - const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset+1)); + const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi+1)); if (inchi_key) { compound->SetInchiKey(inchi_key); } } + if (smiles_available_) { + const char* smiles=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_smiles)); + if (smiles) { + compound->SetSMILES(smiles); + } + } + // Load atoms and bonds this->LoadAtomsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk); @@ -548,6 +573,7 @@ CompoundLib::CompoundLib(): chem_type_available_(false), name_available_(), inchi_available_(), + smiles_available_(), creation_date_(), ost_version_used_() { } diff --git a/modules/conop/src/compound_lib.hh b/modules/conop/src/compound_lib.hh index 2199c1c60511fb46eb6b8ed6c086332751fc7fae..b61355b2021cefd514edfe74d5fe61158373bcf4 100644 --- a/modules/conop/src/compound_lib.hh +++ b/modules/conop/src/compound_lib.hh @@ -58,6 +58,7 @@ private: bool chem_type_available_; // wether pdbx_type is available in db bool name_available_; // wether name is available in db bool inchi_available_; //whether inchi is available in db + bool smiles_available_; //whether smiles are available in db Date creation_date_; String ost_version_used_; }; diff --git a/modules/conop/tests/test_complib.py b/modules/conop/tests/test_complib.py index b094625cb166e26d68192a233e5e5e1469e40146..2419d57eb1517cf0ac94bbdb9a0595bc94ed572a 100644 --- a/modules/conop/tests/test_complib.py +++ b/modules/conop/tests/test_complib.py @@ -7,8 +7,8 @@ import tempfile class TestCompLib(unittest.TestCase): - def test_three_vs_five_letter_code(self): - + @classmethod + def setUpClass(cls): prefix_path = ost.GetPrefixPath() chemdict_tool_path = os.path.join(prefix_path, "bin", "chemdict_tool") if not os.path.exists(chemdict_tool_path): @@ -18,8 +18,11 @@ class TestCompLib(unittest.TestCase): complib_path = os.path.join(tmp_dir.name, "test_complib.dat") cmd = [chemdict_tool_path, "create", compounds_path, complib_path] subprocess.run(cmd) + cls.complib = conop.CompoundLib.Load(complib_path) + tmp_dir.cleanup() - complib = conop.CompoundLib.Load(complib_path) + def test_three_vs_five_letter_code(self): + complib = self.complib comp_001 = complib.FindCompound("001") comp_hello = complib.FindCompound("hello") @@ -29,6 +32,12 @@ class TestCompLib(unittest.TestCase): self.assertFalse(comp_hello is None) self.assertTrue(comp_yolo is None) + def test_smiles(self): + complib = self.complib + comp_001 = complib.FindCompound("001") + self.assertTrue(comp_001.smiles == "COc1cc(cc(c1OC)OC)C(C(=O)N2CCCC[C@H]2C(=O)O[C@@H](CCCc3ccccc3)CCCc4cccnc4)(F)F") + + if __name__ == "__main__": from ost import testutils testutils.RunTests() diff --git a/modules/conop/tests/test_compound.py b/modules/conop/tests/test_compound.py index a6c704d408d1c3926a786e3e4f0988b6ddfbfcf2..75e4f4523e6fe3694e3dd0b473a81533c15c416b 100644 --- a/modules/conop/tests/test_compound.py +++ b/modules/conop/tests/test_compound.py @@ -21,6 +21,7 @@ class TestCompound(unittest.TestCase): self.assertEqual(compound.inchi, "1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1") self.assertEqual(compound.inchi_key, "QNAYBMKLOCPYGJ-REOHCLBHSA-N") + self.assertEqual(compound.smiles, "C[C@@H](C(=O)O)N" ) if __name__=='__main__': diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index d392692c7fbc0cca9114d8051519d0ce51ace003..e344cd2c7d642ae7227b52c7492c7bf848a44e3d 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -85,6 +85,9 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header, compound_->SetInchi(columns[indices_[DESC]].substr(6).str()); } else if (columns[indices_[DESC_TYPE]] == StringRef("InChIKey", 8)) { compound_->SetInchiKey(columns[indices_[DESC]].str()); + } else if (columns[indices_[DESC_TYPE]] == StringRef("SMILES_CANONICAL", 16) && + columns[indices_[PROGRAM]] == StringRef("OpenEye OEToolkits", 18)) { + compound_->SetSMILES(columns[indices_[DESC]].str()); } } } diff --git a/modules/io/src/mol/chemdict_parser.hh b/modules/io/src/mol/chemdict_parser.hh index d42fe9a223f39b4805991e017ff1e7df34e7809e..5c9281bb7e3f776498730148c712bb3ff1a2f024 100644 --- a/modules/io/src/mol/chemdict_parser.hh +++ b/modules/io/src/mol/chemdict_parser.hh @@ -80,7 +80,8 @@ private: ATOM_ID2=1, BOND_ORDER=2, DESC_TYPE=0, - DESC=1 + DESC=1, + PROGRAM=2, } PropIndex; char last_; int indices_[10];