Skip to content
Snippets Groups Projects
Unverified Commit 486a5b2b authored by Xavier Robin's avatar Xavier Robin
Browse files

feat: SCHWED-6002 add SMILES to compound lib

parent e2f2ee92
No related branches found
No related tags found
No related merge requests found
...@@ -126,6 +126,9 @@ void export_Compound() { ...@@ -126,6 +126,9 @@ void export_Compound() {
.add_property("inchi_key", .add_property("inchi_key",
make_function(&Compound::GetInchiKey, make_function(&Compound::GetInchiKey,
return_value_policy<copy_const_reference>())) return_value_policy<copy_const_reference>()))
.add_property("smiles",
make_function(&Compound::GetSMILES,
return_value_policy<copy_const_reference>()))
; ;
class_<AtomSpec>("AtomSpec", no_init) class_<AtomSpec>("AtomSpec", no_init)
......
...@@ -149,6 +149,7 @@ public: ...@@ -149,6 +149,7 @@ public:
name_(), name_(),
inchi_(), inchi_(),
inchi_key_(), inchi_key_(),
smiles_(),
atom_specs_(), atom_specs_(),
bond_specs_(), bond_specs_(),
chem_class_(), chem_class_(),
...@@ -252,6 +253,10 @@ public: ...@@ -252,6 +253,10 @@ public:
const String& GetInchiKey() { return inchi_key_; } const String& GetInchiKey() { return inchi_key_; }
void SetSMILES(const String& smiles) { smiles_=smiles; }
const String& GetSMILES() { return smiles_; }
const BondSpecList& GetBondSpecs() const { const BondSpecList& GetBondSpecs() const {
return bond_specs_; return bond_specs_;
} }
...@@ -281,6 +286,7 @@ private: ...@@ -281,6 +286,7 @@ private:
String name_; String name_;
String inchi_; String inchi_;
String inchi_key_; String inchi_key_;
String smiles_;
AtomSpecList atom_specs_; AtomSpecList atom_specs_;
BondSpecList bond_specs_; BondSpecList bond_specs_;
mol::ChemClass chem_class_; mol::ChemClass chem_class_;
......
...@@ -62,7 +62,8 @@ const char* CREATE_CMD[]={ ...@@ -62,7 +62,8 @@ const char* CREATE_CMD[]={
" pdb_modified TIMESTAMP, " " pdb_modified TIMESTAMP, "
" name VARCHAR(256), " " name VARCHAR(256), "
" inchi_code TEXT, " " inchi_code TEXT, "
" inchi_key TEXT " " inchi_key TEXT, "
" smiles TEXT "
");", ");",
" CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds " " CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds "
" (tlc, dialect)", " (tlc, dialect)",
...@@ -98,8 +99,9 @@ const char* CREATE_CMD[]={ ...@@ -98,8 +99,9 @@ const char* CREATE_CMD[]={
const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds " const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds "
" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, pdb_modified, name, inchi_code, inchi_key) " " (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, "
" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?)"; " pdb_modified, name, inchi_code, inchi_key, smiles) "
" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?, ?)";
const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms " const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms "
" (compound_id, name, alt_name, element, is_aromatic, stereo_conf, " " (compound_id, name, alt_name, element, is_aromatic, stereo_conf, "
...@@ -262,6 +264,8 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) ...@@ -262,6 +264,8 @@ void CompoundLib::AddCompound(const CompoundPtr& compound)
compound->GetInchi().length(), NULL); compound->GetInchi().length(), NULL);
sqlite3_bind_text(stmt, 11, compound->GetInchiKey().c_str(), sqlite3_bind_text(stmt, 11, compound->GetInchiKey().c_str(),
compound->GetInchiKey().length(), NULL); compound->GetInchiKey().length(), NULL);
sqlite3_bind_text(stmt, 12, compound->GetSMILES().c_str(),
compound->GetSMILES().length(), NULL);
} else { } else {
LOG_ERROR(sqlite3_errmsg(db_->ptr)); LOG_ERROR(sqlite3_errmsg(db_->ptr));
sqlite3_finalize(stmt); sqlite3_finalize(stmt);
...@@ -405,6 +409,13 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly) ...@@ -405,6 +409,13 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly)
&stmt, NULL); &stmt, NULL);
lib->inchi_available_ = retval==SQLITE_OK; lib->inchi_available_ = retval==SQLITE_OK;
sqlite3_finalize(stmt); sqlite3_finalize(stmt);
// check if SMILES are available
aq="SELECT smiles FROM chem_compounds LIMIT 1";
retval=sqlite3_prepare_v2(lib->db_->ptr, aq.c_str(),
static_cast<int>(aq.length()),
&stmt, NULL);
lib->smiles_available_ = retval==SQLITE_OK;
sqlite3_finalize(stmt);
lib->creation_date_ = lib->GetCreationDate(); lib->creation_date_ = lib->GetCreationDate();
lib->ost_version_used_ = lib->GetOSTVersionUsed(); lib->ost_version_used_ = lib->GetOSTVersionUsed();
...@@ -471,17 +482,24 @@ CompoundPtr CompoundLib::FindCompound(const String& id, ...@@ -471,17 +482,24 @@ CompoundPtr CompoundLib::FindCompound(const String& id,
return i->second; return i->second;
} }
String query="SELECT id, tlc, olc, chem_class, dialect, formula"; String query="SELECT id, tlc, olc, chem_class, dialect, formula";
int col_offset = 0; int col_offset_inchi = 0;
int col_offset_smiles = 0;
if(chem_type_available_) { if(chem_type_available_) {
query+=", chem_type"; query+=", chem_type";
col_offset+=1; col_offset_inchi+=1;
col_offset_smiles+=1;
if(name_available_) { if(name_available_) {
query+=", name"; query+=", name";
col_offset+=1; col_offset_inchi+=1;
col_offset_smiles+=1;
} }
} }
if(inchi_available_) { if(inchi_available_) {
query+=", inchi_code, inchi_key"; query+=", inchi_code, inchi_key";
col_offset_smiles+=2;
}
if(smiles_available_) {
query+=", smiles";
} }
query+=" FROM chem_compounds" query+=" FROM chem_compounds"
...@@ -515,15 +533,22 @@ CompoundPtr CompoundLib::FindCompound(const String& id, ...@@ -515,15 +533,22 @@ CompoundPtr CompoundLib::FindCompound(const String& id,
} }
} }
if (inchi_available_) { if (inchi_available_) {
const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset)); const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi));
if (inchi_code) { if (inchi_code) {
compound->SetInchi(inchi_code); compound->SetInchi(inchi_code);
} }
const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset+1)); const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi+1));
if (inchi_key) { if (inchi_key) {
compound->SetInchiKey(inchi_key); compound->SetInchiKey(inchi_key);
} }
} }
if (smiles_available_) {
const char* smiles=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_smiles));
if (smiles) {
compound->SetSMILES(smiles);
}
}
// Load atoms and bonds // Load atoms and bonds
this->LoadAtomsFromDB(compound, pk); this->LoadAtomsFromDB(compound, pk);
this->LoadBondsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk);
...@@ -548,6 +573,7 @@ CompoundLib::CompoundLib(): ...@@ -548,6 +573,7 @@ CompoundLib::CompoundLib():
chem_type_available_(false), chem_type_available_(false),
name_available_(), name_available_(),
inchi_available_(), inchi_available_(),
smiles_available_(),
creation_date_(), creation_date_(),
ost_version_used_() { } ost_version_used_() { }
......
...@@ -58,6 +58,7 @@ private: ...@@ -58,6 +58,7 @@ private:
bool chem_type_available_; // wether pdbx_type is available in db bool chem_type_available_; // wether pdbx_type is available in db
bool name_available_; // wether name is available in db bool name_available_; // wether name is available in db
bool inchi_available_; //whether inchi is available in db bool inchi_available_; //whether inchi is available in db
bool smiles_available_; //whether smiles are available in db
Date creation_date_; Date creation_date_;
String ost_version_used_; String ost_version_used_;
}; };
......
...@@ -7,8 +7,8 @@ import tempfile ...@@ -7,8 +7,8 @@ import tempfile
class TestCompLib(unittest.TestCase): class TestCompLib(unittest.TestCase):
def test_three_vs_five_letter_code(self): @classmethod
def setUpClass(cls):
prefix_path = ost.GetPrefixPath() prefix_path = ost.GetPrefixPath()
chemdict_tool_path = os.path.join(prefix_path, "bin", "chemdict_tool") chemdict_tool_path = os.path.join(prefix_path, "bin", "chemdict_tool")
if not os.path.exists(chemdict_tool_path): if not os.path.exists(chemdict_tool_path):
...@@ -18,8 +18,11 @@ class TestCompLib(unittest.TestCase): ...@@ -18,8 +18,11 @@ class TestCompLib(unittest.TestCase):
complib_path = os.path.join(tmp_dir.name, "test_complib.dat") complib_path = os.path.join(tmp_dir.name, "test_complib.dat")
cmd = [chemdict_tool_path, "create", compounds_path, complib_path] cmd = [chemdict_tool_path, "create", compounds_path, complib_path]
subprocess.run(cmd) subprocess.run(cmd)
cls.complib = conop.CompoundLib.Load(complib_path)
tmp_dir.cleanup()
complib = conop.CompoundLib.Load(complib_path) def test_three_vs_five_letter_code(self):
complib = self.complib
comp_001 = complib.FindCompound("001") comp_001 = complib.FindCompound("001")
comp_hello = complib.FindCompound("hello") comp_hello = complib.FindCompound("hello")
...@@ -29,6 +32,12 @@ class TestCompLib(unittest.TestCase): ...@@ -29,6 +32,12 @@ class TestCompLib(unittest.TestCase):
self.assertFalse(comp_hello is None) self.assertFalse(comp_hello is None)
self.assertTrue(comp_yolo is None) self.assertTrue(comp_yolo is None)
def test_smiles(self):
complib = self.complib
comp_001 = complib.FindCompound("001")
self.assertTrue(comp_001.smiles == "COc1cc(cc(c1OC)OC)C(C(=O)N2CCCC[C@H]2C(=O)O[C@@H](CCCc3ccccc3)CCCc4cccnc4)(F)F")
if __name__ == "__main__": if __name__ == "__main__":
from ost import testutils from ost import testutils
testutils.RunTests() testutils.RunTests()
...@@ -21,6 +21,7 @@ class TestCompound(unittest.TestCase): ...@@ -21,6 +21,7 @@ class TestCompound(unittest.TestCase):
self.assertEqual(compound.inchi, self.assertEqual(compound.inchi,
"1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1") "1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1")
self.assertEqual(compound.inchi_key, "QNAYBMKLOCPYGJ-REOHCLBHSA-N") self.assertEqual(compound.inchi_key, "QNAYBMKLOCPYGJ-REOHCLBHSA-N")
self.assertEqual(compound.smiles, "C[C@@H](C(=O)O)N" )
if __name__=='__main__': if __name__=='__main__':
......
...@@ -85,6 +85,9 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header, ...@@ -85,6 +85,9 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header,
compound_->SetInchi(columns[indices_[DESC]].substr(6).str()); compound_->SetInchi(columns[indices_[DESC]].substr(6).str());
} else if (columns[indices_[DESC_TYPE]] == StringRef("InChIKey", 8)) { } else if (columns[indices_[DESC_TYPE]] == StringRef("InChIKey", 8)) {
compound_->SetInchiKey(columns[indices_[DESC]].str()); compound_->SetInchiKey(columns[indices_[DESC]].str());
} else if (columns[indices_[DESC_TYPE]] == StringRef("SMILES_CANONICAL", 16) &&
columns[indices_[PROGRAM]] == StringRef("OpenEye OEToolkits", 18)) {
compound_->SetSMILES(columns[indices_[DESC]].str());
} }
} }
} }
......
...@@ -80,7 +80,8 @@ private: ...@@ -80,7 +80,8 @@ private:
ATOM_ID2=1, ATOM_ID2=1,
BOND_ORDER=2, BOND_ORDER=2,
DESC_TYPE=0, DESC_TYPE=0,
DESC=1 DESC=1,
PROGRAM=2,
} PropIndex; } PropIndex;
char last_; char last_;
int indices_[10]; int indices_[10];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment