Skip to content
Snippets Groups Projects
Verified Commit d6de1def authored by Xavier Robin's avatar Xavier Robin
Browse files

feat: Find compound by SMILES or InChI code/key

parent c941b153
Branches
Tags
No related merge requests found
...@@ -58,11 +58,17 @@ built with OST 1.5.0 or later can be loaded. ...@@ -58,11 +58,17 @@ built with OST 1.5.0 or later can be loaded.
Create a new compound library Create a new compound library
.. method:: FindCompound(tlc, dialect='PDB') .. method:: FindCompound(id, dialect='PDB', by="tlc")
Lookup compound by its three-letter-code, e.g ALA. If no compound with that Lookup a compound. By default the compound is searched by its
name exists, the function returns None. Compounds are cached after they have three-letter-code, e.g ALA. This can be changed with the `by` argument.
been loaded with FindCompound. To delete the compound cache, use The following keys are available: "tlc" (three-letter-code or compound ID),
"inchi_code", "inchi_key" and "smiles".
If no compound with that name exists, the function returns None.
Compounds are cached after they have been loaded with FindCompound.
To delete the compound cache, use
:meth:`ClearCache`. :meth:`ClearCache`.
:returns: The found compound :returns: The found compound
......
...@@ -70,9 +70,10 @@ char get_chemtype(CompoundPtr compound) ...@@ -70,9 +70,10 @@ char get_chemtype(CompoundPtr compound)
} }
CompoundPtr find_compound(CompoundLibPtr comp_lib, CompoundPtr find_compound(CompoundLibPtr comp_lib,
const String& tlc, const String& dialect) const String& id, const String& dialect,
const String& by="tlc")
{ {
return comp_lib->FindCompound(tlc, tr_dialect(dialect)); return comp_lib->FindCompound(id, tr_dialect(dialect), by);
} }
bool is_residue_complete(CompoundLibPtr comp_lib, bool is_residue_complete(CompoundLibPtr comp_lib,
...@@ -151,7 +152,7 @@ void export_Compound() { ...@@ -151,7 +152,7 @@ void export_Compound() {
class_<CompoundLib>("CompoundLib", no_init) class_<CompoundLib>("CompoundLib", no_init)
.def("Load", &CompoundLib::Load, arg("readonly")=true).staticmethod("Load") .def("Load", &CompoundLib::Load, arg("readonly")=true).staticmethod("Load")
.def("FindCompound", &find_compound, .def("FindCompound", &find_compound,
(arg("tlc"), arg("dialect")="PDB")) (arg("id"), arg("dialect")="PDB", arg("by")="tlc"))
.def("IsResidueComplete", &is_residue_complete, (arg("residue"), .def("IsResidueComplete", &is_residue_complete, (arg("residue"),
arg("check_hydrogens")=false, arg("check_hydrogens")=false,
arg("dialect")="PDB")) arg("dialect")="PDB"))
......
...@@ -70,8 +70,14 @@ const char* CREATE_CMD[]={ ...@@ -70,8 +70,14 @@ const char* CREATE_CMD[]={
" inchi_key TEXT, " " inchi_key TEXT, "
" smiles TEXT " " smiles TEXT "
");", ");",
" CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds " " CREATE UNIQUE INDEX IF NOT EXISTS compound_tlc_index ON chem_compounds "
" (tlc, dialect)", " (tlc, dialect)",
" CREATE INDEX IF NOT EXISTS compound_smiles_index ON chem_compounds "
" (smiles, dialect)",
" CREATE INDEX IF NOT EXISTS compound_inchi_code_index ON chem_compounds "
" (inchi_code, dialect)",
" CREATE INDEX IF NOT EXISTS compound_inchi_key_index ON chem_compounds "
" (inchi_key, dialect)",
"CREATE TABLE IF NOT EXISTS atoms ( " "CREATE TABLE IF NOT EXISTS atoms ( "
" id INTEGER PRIMARY KEY AUTOINCREMENT, " " id INTEGER PRIMARY KEY AUTOINCREMENT, "
" compound_id INTEGER REFERENCES chem_compounds (id) ON DELETE CASCADE, " " compound_id INTEGER REFERENCES chem_compounds (id) ON DELETE CASCADE, "
...@@ -498,18 +504,37 @@ void CompoundLib::LoadBondsFromDB(CompoundPtr comp, int pk) const { ...@@ -498,18 +504,37 @@ void CompoundLib::LoadBondsFromDB(CompoundPtr comp, int pk) const {
} }
CompoundPtr CompoundLib::FindCompound(const String& id, CompoundPtr CompoundLib::FindCompound(const String& id,
Compound::Dialect dialect) const { Compound::Dialect dialect,
CompoundMap::const_iterator i=compound_cache_.find(id); const String& by) const {
// Validate "by" argument
std::set<std::string> allowed_keys{"tlc", "inchi_code", "inchi_key"};
if(smiles_available_) {
allowed_keys.insert("smiles");
}
if (allowed_keys.find(by) == allowed_keys.end()) {
std::stringstream msg;
msg << "Invalid 'by' key: " << by;
throw ost::Error(msg.str());
}
// Check cache
String cache_key = by + "_" + id;
CompoundMap::const_iterator i=compound_cache_.find(cache_key);
if (i!=compound_cache_.end()) { if (i!=compound_cache_.end()) {
LOG_DEBUG("Retrieved compound " << cache_key << " from cache");
return i->second; return i->second;
} }
// Build the query
String query="SELECT id, tlc, olc, chem_class, dialect, formula, chem_type, name, inchi_code, inchi_key"; String query="SELECT id, tlc, olc, chem_class, dialect, formula, chem_type, name, inchi_code, inchi_key";
if(smiles_available_) { if(smiles_available_) {
query+=", smiles"; query+=", smiles";
} }
query+=" FROM chem_compounds" query+=" FROM chem_compounds"
" WHERE tlc=? AND dialect='"+String(1, char(dialect))+"'"; " WHERE " + by + "=? AND dialect='"+String(1, char(dialect))+"'";
// Run the query
sqlite3_stmt* stmt; sqlite3_stmt* stmt;
int retval=sqlite3_prepare_v2(db_->ptr, query.c_str(), int retval=sqlite3_prepare_v2(db_->ptr, query.c_str(),
static_cast<int>(query.length()), static_cast<int>(query.length()),
...@@ -553,7 +578,7 @@ CompoundPtr CompoundLib::FindCompound(const String& id, ...@@ -553,7 +578,7 @@ CompoundPtr CompoundLib::FindCompound(const String& id,
// Load atoms and bonds // Load atoms and bonds
this->LoadAtomsFromDB(compound, pk); this->LoadAtomsFromDB(compound, pk);
this->LoadBondsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk);
compound_cache_.insert(std::make_pair(compound->GetID(), compound)); compound_cache_.insert(std::make_pair(cache_key, compound));
sqlite3_finalize(stmt); sqlite3_finalize(stmt);
return compound; return compound;
} }
......
...@@ -39,7 +39,8 @@ public: ...@@ -39,7 +39,8 @@ public:
~CompoundLib(); ~CompoundLib();
virtual CompoundPtr FindCompound(const String& id, virtual CompoundPtr FindCompound(const String& id,
Compound::Dialect dialect) const; Compound::Dialect dialect,
const String& by="tlc") const;
void AddCompound(const CompoundPtr& compound); void AddCompound(const CompoundPtr& compound);
CompoundLibPtr Copy(const String& filename) const; CompoundLibPtr Copy(const String& filename) const;
void ClearCache(); void ClearCache();
......
...@@ -13,7 +13,8 @@ class DLLEXPORT_OST_CONOP CompoundLibBase { ...@@ -13,7 +13,8 @@ class DLLEXPORT_OST_CONOP CompoundLibBase {
public: public:
virtual ~CompoundLibBase() {} virtual ~CompoundLibBase() {}
virtual CompoundPtr FindCompound(const String& id, virtual CompoundPtr FindCompound(const String& id,
Compound::Dialect dialect) const = 0; Compound::Dialect dialect,
const String& by="tlc") const = 0;
bool IsResidueComplete(const ost::mol::ResidueHandle& res, bool IsResidueComplete(const ost::mol::ResidueHandle& res,
bool check_hydrogens, bool check_hydrogens,
......
...@@ -42,8 +42,15 @@ CompoundMap MinimalCompoundLib::InitCompounds() { ...@@ -42,8 +42,15 @@ CompoundMap MinimalCompoundLib::InitCompounds() {
CompoundPtr MinimalCompoundLib::FindCompound(const String& id, CompoundPtr MinimalCompoundLib::FindCompound(const String& id,
Compound::Dialect dialect) const Compound::Dialect dialect,
const String& by) const
{ {
if (by != "tlc") {
// Only tlc is supported by the minimal compound lib
std::stringstream msg;
msg << "Invalid 'by' key: " << by;
throw ost::Error(msg.str());
}
CompoundMap::const_iterator i = MinimalCompoundLib::compounds_.find(id); CompoundMap::const_iterator i = MinimalCompoundLib::compounds_.find(id);
if (i != MinimalCompoundLib::compounds_.end()) { if (i != MinimalCompoundLib::compounds_.end()) {
return i->second; return i->second;
......
...@@ -17,7 +17,8 @@ public: ...@@ -17,7 +17,8 @@ public:
CompoundLibBase() CompoundLibBase()
{} {}
virtual CompoundPtr FindCompound(const String& id, virtual CompoundPtr FindCompound(const String& id,
Compound::Dialect dialect) const; Compound::Dialect dialect,
const String& by="tlc") const;
private: private:
static CompoundMap InitCompounds(); static CompoundMap InitCompounds();
// since this information is never going to change, it is shared // since this information is never going to change, it is shared
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment