From f5acb7f9710b308f071bdbce0d50f877481e9013 Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavier.robin@unibas.ch> Date: Fri, 5 Jan 2024 15:49:11 +0100 Subject: [PATCH] feat: optionally ignore reserved compounds --- modules/conop/src/chemdict_tool.cc | 24 +++++++++++++-------- modules/io/src/mol/chemdict_parser.cc | 30 +++++++++++++++++++++++++-- modules/io/src/mol/chemdict_parser.hh | 8 +++++-- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/modules/conop/src/chemdict_tool.cc b/modules/conop/src/chemdict_tool.cc index 404f50b8a..eff12c73e 100644 --- a/modules/conop/src/chemdict_tool.cc +++ b/modules/conop/src/chemdict_tool.cc @@ -35,30 +35,36 @@ using namespace ost; void PrintUsage() { - std::cout << "usage: chemdict_tool action <compound-dict> <db> (pdb|charmm|amber|opls)" << std::endl; + std::cout << "usage: chemdict_tool ACTION <compound-dict> <db> [DIALECT] [OPTIONS]" << std::endl; std::cout << "supported actions are:" << std::endl; std::cout << " create - creates a new db " << std::endl; std::cout << " update - update existing db" << std::endl; + std::cout << "supported dialects are: pdb, charmm, amber, opls" << std::endl; + std::cout << "supported options are:" << std::endl; + std::cout << " -i - ignore compounds reserved by the PDB (01-99, DRG, INH, LIG)" << std::endl; } int main(int argc, char const *argv[]) { - if (argc!=4 && argc!=5) { + if (argc!=4 && argc!=5 && argc!=6) { PrintUsage(); return 0; } conop::Compound::Dialect dialect=conop::Compound::PDB; - if (argc==5) { - String format=argv[4]; + bool ignore_reserved=false; + for (int i = 4; i < argc; i++) { + String param=argv[i]; - if (format=="charmm") { + if (param=="charmm") { dialect=conop::Compound::CHARMM; - } else if (format=="pdb") { + } else if (param=="pdb") { dialect=conop::Compound::PDB; - } else if (format=="opls") { + } else if (param=="opls") { dialect=conop::Compound::OPLS; - } else if (format=="amber") { + } else if (param=="amber") { dialect=conop::Compound::AMBER; + } else if (param=="-i") { + ignore_reserved=true; } else { PrintUsage(); return 0; @@ -75,7 +81,7 @@ int main(int argc, char const *argv[]) filtered_istream.push(boost::iostreams::gzip_decompressor()); } filtered_istream.push(istream); - io::ChemdictParser cdp(filtered_istream, dialect); + io::ChemdictParser cdp(filtered_istream, dialect, ignore_reserved); conop::CompoundLibPtr compound_lib; bool in_mem=false; if (!strncmp(argv[1], "create", 6)) { diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index a6e6b71db..d838d232a 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -17,8 +17,13 @@ bool ChemdictParser::OnBeginData(const StringRef& data_name) std::cout << last_ << std::flush; } atom_map_.clear(); - insert_=true; - return true; + if (ignore_reserved_ && IsNameReserved(data_name)) { + insert_=false; + } + else { + insert_=true; + } + return true; } bool ChemdictParser::OnBeginLoop(const StarLoopDesc& header) @@ -252,4 +257,25 @@ void ChemdictParser::InitPDBXTypeMap() xtm_["?"]=mol::ChemType(mol::ChemType::UNKNOWN); } +bool ChemdictParser::IsNameReserved(const StringRef& data_name) +{ + // This checks if the compound name is one of the reserved name that will + // never be used in the PDB. See + // https://www.rcsb.org/news/feature/630fee4cebdf34532a949c34 + if ( // Check if in range 01-99 + (data_name.length() == 2 && + data_name[0] >= '0' && data_name[0] <= '9' && + data_name[1] >= '0' && data_name[1] <= '9' && + data_name != StringRef("00", 2)) || + data_name == StringRef("DRG", 3) || + data_name == StringRef("INH", 3) || + data_name == StringRef("LIG", 3) + ) + { + std::cout << "Ignoring reserved compound " << data_name << std::endl; + return true; + } + return false; +} + }} diff --git a/modules/io/src/mol/chemdict_parser.hh b/modules/io/src/mol/chemdict_parser.hh index f33ee72fb..00a395b98 100644 --- a/modules/io/src/mol/chemdict_parser.hh +++ b/modules/io/src/mol/chemdict_parser.hh @@ -40,9 +40,11 @@ typedef enum { class DLLEXPORT_OST_IO ChemdictParser : public StarParser { public: - ChemdictParser(std::istream& stream, conop::Compound::Dialect dialect): + ChemdictParser(std::istream& stream, conop::Compound::Dialect dialect, + bool ignore_reserved=false): StarParser(stream), compound_(new conop::Compound("UNK")), - last_(0), loop_type_(DONT_KNOW), dialect_(dialect) + last_(0), loop_type_(DONT_KNOW), dialect_(dialect), + ignore_reserved_(ignore_reserved) { this->InitTypeMap(); this->InitPDBXTypeMap(); @@ -66,6 +68,7 @@ public: private: void InitTypeMap(); void InitPDBXTypeMap(); + bool IsNameReserved(const StringRef& data_name); conop::CompoundLibPtr lib_; conop::CompoundPtr compound_; typedef enum { @@ -92,6 +95,7 @@ private: LoopType loop_type_; conop::AtomSpec atom_; conop::Compound::Dialect dialect_; + bool ignore_reserved_; }; -- GitLab