diff --git a/modules/conop/src/compound.hh b/modules/conop/src/compound.hh index 69c11d69ea4064f3c78dd7e749b9da44d7a8d532..07916c231b868266dce8642d85d7e94172c36147 100644 --- a/modules/conop/src/compound.hh +++ b/modules/conop/src/compound.hh @@ -179,6 +179,12 @@ public: const String& GetID() const { return tlc_; } + + /// \brief set three-letter code that is unique for every compound + void SetID(const String& id) { + tlc_ = id; + } + Dialect GetDialect() const { return dialect_; } String GetDialectAsString() const { diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index 2213225d696f832b460799d00cda331f8bd24354..382a983affb24a05ca52ad68c899c558d7fe66a4 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -7,6 +7,8 @@ using namespace ost::conop; bool ChemdictParser::OnBeginData(const StringRef& data_name) { + // Create empty compound skeleton + valid_compound_ = false; compound_.reset(new Compound(data_name.str())); compound_->SetDialect(dialect_); if (last_!=data_name[0]) { @@ -14,6 +16,7 @@ bool ChemdictParser::OnBeginData(const StringRef& data_name) std::cout << last_ << std::flush; } atom_map_.clear(); + valid_atom_ = false; return true; } @@ -94,6 +97,14 @@ void ChemdictParser::OnDataRow(const StarLoopDesc& header, void ChemdictParser::OnDataItem(const StarDataItem& item) { if (item.GetCategory()==StringRef("chem_comp", 9)) { + if (item.GetName()==StringRef("id", 2)) { + if (compound_->GetID() != item.GetValue().str()) { + LOG_INFO("_chem_comp.id '" << item.GetValue() << "' doesn't match" + << "ID from data block '" << compound_->GetID() << "'"); + compound_->SetID(item.GetValue().str()); + } + valid_compound_ = true; + } if (item.GetName()==StringRef("type", 4)) { // convert type to uppercase String type=item.GetValue().str(); @@ -159,6 +170,7 @@ void ChemdictParser::OnDataItem(const StarDataItem& item) } } else if (item.GetName()==StringRef("atom_id", 7)) { atom_.name=item.GetValue().str(); + valid_atom_ = true; } else if (item.GetName()==StringRef("alt_atom_id", 11)) { if (compound_->GetID()=="ILE" && item.GetValue()==StringRef("CD1", 3)) { atom_.alt_name="CD"; @@ -177,12 +189,24 @@ void ChemdictParser::OnEndData() { if (compound_) { - if (compound_->GetID() != "UNL" && + if (! valid_compound_) + { + LOG_WARNING("Skipping compound without _chem_comp.id: " << compound_->GetID()); + } + else if (compound_->GetID() != "UNL" && ! (ignore_reserved_ && IsNameReserved(compound_->GetID())) && ! (ignore_obsolete_ && compound_->GetObsolete())) { if (compound_->GetAtomSpecs().empty()) { - compound_->AddAtom(atom_); + // This happens if we had a single atom + if (valid_atom_) + { + compound_->AddAtom(atom_); + } + else + { + LOG_WARNING("Adding compound with no atoms: " << compound_->GetID()); + } } lib_->AddCompound(compound_); } diff --git a/modules/io/src/mol/chemdict_parser.hh b/modules/io/src/mol/chemdict_parser.hh index 7a30cc8fcb434fe825ed887b3159ac5f7ed16180..46a83ec3b36d599b917d08c280119911d63d034c 100644 --- a/modules/io/src/mol/chemdict_parser.hh +++ b/modules/io/src/mol/chemdict_parser.hh @@ -71,6 +71,7 @@ private: bool IsNameReserved(const String& data_name); conop::CompoundLibPtr lib_; conop::CompoundPtr compound_; + bool valid_compound_; typedef enum { ATOM_NAME=0, ALT_ATOM_NAME=1, @@ -93,6 +94,7 @@ private: std::map<String, int> atom_map_; LoopType loop_type_; conop::AtomSpec atom_; + bool valid_atom_; conop::Compound::Dialect dialect_; bool ignore_reserved_; bool ignore_obsolete_;