Skip to content
Snippets Groups Projects
Unverified Commit 486a5b2b authored by Xavier Robin's avatar Xavier Robin
Browse files

feat: SCHWED-6002 add SMILES to compound lib

parent e2f2ee92
No related branches found
No related tags found
No related merge requests found
......@@ -126,6 +126,9 @@ void export_Compound() {
.add_property("inchi_key",
make_function(&Compound::GetInchiKey,
return_value_policy<copy_const_reference>()))
.add_property("smiles",
make_function(&Compound::GetSMILES,
return_value_policy<copy_const_reference>()))
;
class_<AtomSpec>("AtomSpec", no_init)
......
......@@ -149,6 +149,7 @@ public:
name_(),
inchi_(),
inchi_key_(),
smiles_(),
atom_specs_(),
bond_specs_(),
chem_class_(),
......@@ -252,6 +253,10 @@ public:
const String& GetInchiKey() { return inchi_key_; }
void SetSMILES(const String& smiles) { smiles_=smiles; }
const String& GetSMILES() { return smiles_; }
const BondSpecList& GetBondSpecs() const {
return bond_specs_;
}
......@@ -281,6 +286,7 @@ private:
String name_;
String inchi_;
String inchi_key_;
String smiles_;
AtomSpecList atom_specs_;
BondSpecList bond_specs_;
mol::ChemClass chem_class_;
......
......@@ -62,7 +62,8 @@ const char* CREATE_CMD[]={
" pdb_modified TIMESTAMP, "
" name VARCHAR(256), "
" inchi_code TEXT, "
" inchi_key TEXT "
" inchi_key TEXT, "
" smiles TEXT "
");",
" CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds "
" (tlc, dialect)",
......@@ -98,8 +99,9 @@ const char* CREATE_CMD[]={
const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds "
" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, pdb_modified, name, inchi_code, inchi_key) "
" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?)";
" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, "
" pdb_modified, name, inchi_code, inchi_key, smiles) "
" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?), ?, ?, ?, ?)";
const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms "
" (compound_id, name, alt_name, element, is_aromatic, stereo_conf, "
......@@ -262,6 +264,8 @@ void CompoundLib::AddCompound(const CompoundPtr& compound)
compound->GetInchi().length(), NULL);
sqlite3_bind_text(stmt, 11, compound->GetInchiKey().c_str(),
compound->GetInchiKey().length(), NULL);
sqlite3_bind_text(stmt, 12, compound->GetSMILES().c_str(),
compound->GetSMILES().length(), NULL);
} else {
LOG_ERROR(sqlite3_errmsg(db_->ptr));
sqlite3_finalize(stmt);
......@@ -405,6 +409,13 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly)
&stmt, NULL);
lib->inchi_available_ = retval==SQLITE_OK;
sqlite3_finalize(stmt);
// check if SMILES are available
aq="SELECT smiles FROM chem_compounds LIMIT 1";
retval=sqlite3_prepare_v2(lib->db_->ptr, aq.c_str(),
static_cast<int>(aq.length()),
&stmt, NULL);
lib->smiles_available_ = retval==SQLITE_OK;
sqlite3_finalize(stmt);
lib->creation_date_ = lib->GetCreationDate();
lib->ost_version_used_ = lib->GetOSTVersionUsed();
......@@ -471,17 +482,24 @@ CompoundPtr CompoundLib::FindCompound(const String& id,
return i->second;
}
String query="SELECT id, tlc, olc, chem_class, dialect, formula";
int col_offset = 0;
int col_offset_inchi = 0;
int col_offset_smiles = 0;
if(chem_type_available_) {
query+=", chem_type";
col_offset+=1;
col_offset_inchi+=1;
col_offset_smiles+=1;
if(name_available_) {
query+=", name";
col_offset+=1;
col_offset_inchi+=1;
col_offset_smiles+=1;
}
}
if(inchi_available_) {
query+=", inchi_code, inchi_key";
col_offset_smiles+=2;
}
if(smiles_available_) {
query+=", smiles";
}
query+=" FROM chem_compounds"
......@@ -515,15 +533,22 @@ CompoundPtr CompoundLib::FindCompound(const String& id,
}
}
if (inchi_available_) {
const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset));
const char* inchi_code=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi));
if (inchi_code) {
compound->SetInchi(inchi_code);
}
const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset+1));
const char* inchi_key=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_inchi+1));
if (inchi_key) {
compound->SetInchiKey(inchi_key);
}
}
if (smiles_available_) {
const char* smiles=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 6+col_offset_smiles));
if (smiles) {
compound->SetSMILES(smiles);
}
}
// Load atoms and bonds
this->LoadAtomsFromDB(compound, pk);
this->LoadBondsFromDB(compound, pk);
......@@ -548,6 +573,7 @@ CompoundLib::CompoundLib():
chem_type_available_(false),
name_available_(),
inchi_available_(),
smiles_available_(),
creation_date_(),
ost_version_used_() { }
......
......@@ -58,6 +58,7 @@ private:
bool chem_type_available_; // wether pdbx_type is available in db
bool name_available_; // wether name is available in db
bool inchi_available_; //whether inchi is available in db
bool smiles_available_; //whether smiles are available in db
Date creation_date_;
String ost_version_used_;
};
......
......@@ -7,8 +7,8 @@ import tempfile
class TestCompLib(unittest.TestCase):
def test_three_vs_five_letter_code(self):
@classmethod
def setUpClass(cls):
prefix_path = ost.GetPrefixPath()
chemdict_tool_path = os.path.join(prefix_path, "bin", "chemdict_tool")
if not os.path.exists(chemdict_tool_path):
......@@ -18,8 +18,11 @@ class TestCompLib(unittest.TestCase):
complib_path = os.path.join(tmp_dir.name, "test_complib.dat")
cmd = [chemdict_tool_path, "create", compounds_path, complib_path]
subprocess.run(cmd)
cls.complib = conop.CompoundLib.Load(complib_path)
tmp_dir.cleanup()
complib = conop.CompoundLib.Load(complib_path)
def test_three_vs_five_letter_code(self):
complib = self.complib
comp_001 = complib.FindCompound("001")
comp_hello = complib.FindCompound("hello")
......@@ -29,6 +32,12 @@ class TestCompLib(unittest.TestCase):
self.assertFalse(comp_hello is None)
self.assertTrue(comp_yolo is None)
def test_smiles(self):
complib = self.complib
comp_001 = complib.FindCompound("001")
self.assertTrue(comp_001.smiles == "COc1cc(cc(c1OC)OC)C(C(=O)N2CCCC[C@H]2C(=O)O[C@@H](CCCc3ccccc3)CCCc4cccnc4)(F)F")
if __name__ == "__main__":
from ost import testutils
testutils.RunTests()
......@@ -21,6 +21,7 @@ class TestCompound(unittest.TestCase):
self.assertEqual(compound.inchi,
"1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1")
self.assertEqual(compound.inchi_key, "QNAYBMKLOCPYGJ-REOHCLBHSA-N")
self.assertEqual(compound.smiles, "C[C@@H](C(=O)O)N" )
if __name__=='__main__':
......
......@@ -85,6 +85,9 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header,
compound_->SetInchi(columns[indices_[DESC]].substr(6).str());
} else if (columns[indices_[DESC_TYPE]] == StringRef("InChIKey", 8)) {
compound_->SetInchiKey(columns[indices_[DESC]].str());
} else if (columns[indices_[DESC_TYPE]] == StringRef("SMILES_CANONICAL", 16) &&
columns[indices_[PROGRAM]] == StringRef("OpenEye OEToolkits", 18)) {
compound_->SetSMILES(columns[indices_[DESC]].str());
}
}
}
......
......@@ -80,7 +80,8 @@ private:
ATOM_ID2=1,
BOND_ORDER=2,
DESC_TYPE=0,
DESC=1
DESC=1,
PROGRAM=2,
} PropIndex;
char last_;
int indices_[10];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment