Skip to content
Snippets Groups Projects
Unverified Commit 454ba5a7 authored by Xavier Robin's avatar Xavier Robin
Browse files

fix: do not fill compound lib with invalid compounds and atoms

Reading the BIRD data from prd-all.cif.gz which doesn't contain any
compound would result in a compound lib filled with dummy compounds and
atoms, containing no useful data. This commit skips compounds with no
_chem_comp.id (key data item) and non-loop atoms without an
_chem_comp_atom.atom_id, with a warning.
The result of reading prd-all.cif.gz is now an empty compound lib.
parent 74dea35c
No related branches found
No related tags found
No related merge requests found
......@@ -179,6 +179,12 @@ public:
const String& GetID() const {
return tlc_;
}
/// \brief set three-letter code that is unique for every compound
void SetID(const String& id) {
tlc_ = id;
}
Dialect GetDialect() const { return dialect_; }
String GetDialectAsString() const {
......
......@@ -7,6 +7,8 @@ using namespace ost::conop;
bool ChemdictParser::OnBeginData(const StringRef& data_name)
{
// Create empty compound skeleton
valid_compound_ = false;
compound_.reset(new Compound(data_name.str()));
compound_->SetDialect(dialect_);
if (last_!=data_name[0]) {
......@@ -14,6 +16,7 @@ bool ChemdictParser::OnBeginData(const StringRef& data_name)
std::cout << last_ << std::flush;
}
atom_map_.clear();
valid_atom_ = false;
return true;
}
......@@ -94,6 +97,14 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header,
void ChemdictParser::OnDataItem(const StarDataItem& item)
{
if (item.GetCategory()==StringRef("chem_comp", 9)) {
if (item.GetName()==StringRef("id", 2)) {
if (compound_->GetID() != item.GetValue().str()) {
LOG_INFO("_chem_comp.id '" << item.GetValue() << "' doesn't match"
<< "ID from data block '" << compound_->GetID() << "'");
compound_->SetID(item.GetValue().str());
}
valid_compound_ = true;
}
if (item.GetName()==StringRef("type", 4)) {
// convert type to uppercase
String type=item.GetValue().str();
......@@ -159,6 +170,7 @@ void ChemdictParser::OnDataItem(const StarDataItem& item)
}
} else if (item.GetName()==StringRef("atom_id", 7)) {
atom_.name=item.GetValue().str();
valid_atom_ = true;
} else if (item.GetName()==StringRef("alt_atom_id", 11)) {
if (compound_->GetID()=="ILE" && item.GetValue()==StringRef("CD1", 3)) {
atom_.alt_name="CD";
......@@ -177,12 +189,24 @@ void ChemdictParser::OnEndData()
{
if (compound_)
{
if (compound_->GetID() != "UNL" &&
if (! valid_compound_)
{
LOG_WARNING("Skipping compound without _chem_comp.id: " << compound_->GetID());
}
else if (compound_->GetID() != "UNL" &&
! (ignore_reserved_ && IsNameReserved(compound_->GetID())) &&
! (ignore_obsolete_ && compound_->GetObsolete()))
{
if (compound_->GetAtomSpecs().empty()) {
compound_->AddAtom(atom_);
// This happens if we had a single atom
if (valid_atom_)
{
compound_->AddAtom(atom_);
}
else
{
LOG_WARNING("Adding compound with no atoms: " << compound_->GetID());
}
}
lib_->AddCompound(compound_);
}
......
......@@ -71,6 +71,7 @@ private:
bool IsNameReserved(const String& data_name);
conop::CompoundLibPtr lib_;
conop::CompoundPtr compound_;
bool valid_compound_;
typedef enum {
ATOM_NAME=0,
ALT_ATOM_NAME=1,
......@@ -93,6 +94,7 @@ private:
std::map<String, int> atom_map_;
LoopType loop_type_;
conop::AtomSpec atom_;
bool valid_atom_;
conop::Compound::Dialect dialect_;
bool ignore_reserved_;
bool ignore_obsolete_;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment