diff --git a/modules/conop/pymod/export_compound.cc b/modules/conop/pymod/export_compound.cc index a2599acf43b004562cd20efd198456a9a577b10a..0b7e0dde8d24422a0758d91019aebf0d1252701b 100644 --- a/modules/conop/pymod/export_compound.cc +++ b/modules/conop/pymod/export_compound.cc @@ -57,11 +57,17 @@ char get_chemclass(CompoundPtr compound) return char(compound->GetChemClass()); } + void set_chemclass(CompoundPtr compound, char cc) { compound->SetChemClass(ChemClass(cc)); } +char get_chemtype(CompoundPtr compound) +{ + return char(compound->GetChemType()); +} + CompoundPtr find_compound(CompoundLibPtr comp_lib, const String& tlc, const String& dialect) { @@ -92,6 +98,7 @@ void export_Compound() { .def("IsPeptideLinking", &Compound::IsPeptideLinking) .add_property("chem_class", &get_chemclass, &set_chemclass) + .add_property("chem_type", &get_chemtype) .add_property("formula",make_function(&Compound::GetFormula, return_value_policy<copy_const_reference>()), &Compound::SetFormula) diff --git a/modules/conop/src/compound.hh b/modules/conop/src/compound.hh index d3f47bd973166a8bdc169729eef1be8db61a9d50..e33a7e38cfb840ebc5e5e050fbd7da55073e2c68 100644 --- a/modules/conop/src/compound.hh +++ b/modules/conop/src/compound.hh @@ -25,6 +25,7 @@ #include <ost/conop/module_config.hh> #include <ost/mol/chem_class.hh> +#include <ost/mol/chem_type.hh> namespace ost { namespace conop { @@ -120,7 +121,7 @@ public: } Dialect; Compound(const String& id) - : olc_('?'), tlc_(id), chem_class_(), dialect_(Compound::PDB) { + : olc_('?'), tlc_(id), chem_class_(), chem_type_(), dialect_(Compound::PDB){ } /// \brief three-letter code that is unique for every compound @@ -166,6 +167,18 @@ public: return chem_class_; } + void SetChemType(mol::ChemType chem_type) { + chem_type_=chem_type; + } + + /// \brief PDB ligand classification from component dictionary + /// + /// The PDB classifies all compounds into 7 categories. This classification + /// is extracted from the PDB component dictionary (field: pdbx_type) + mol::ChemType GetChemType() const { + return chem_type_; + } + bool IsPeptideLinking() const { return chem_class_.IsPeptideLinking(); } @@ -221,6 +234,7 @@ private: AtomSpecList atom_specs_; BondSpecList bond_specs_; mol::ChemClass chem_class_; + mol::ChemType chem_type_; Dialect dialect_; Date creation_date_; Date mod_date_; diff --git a/modules/conop/src/compound_lib.cc b/modules/conop/src/compound_lib.cc index c16c8505a8ca06f703d2d3c4cbf5263d0764c610..c05e60fb6d2ce72343e6ab44d637526c16639707 100644 --- a/modules/conop/src/compound_lib.cc +++ b/modules/conop/src/compound_lib.cc @@ -41,6 +41,7 @@ const char* CREATE_CMD[]={ " olc VARCHAR(1) NOT NULL, " " dialect VARCHAR(1) NOT NULL, " " chem_class VARCHAR(1), " +" chem_type VARCHAR(1), " " formula VARCHAR(64) NOT NULL, " " pdb_initial TIMESTAMP, " " pdb_modified TIMESTAMP " @@ -79,8 +80,8 @@ const char* CREATE_CMD[]={ const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds " -" (tlc, olc, dialect, chem_class, formula, pdb_initial, pdb_modified) " -" VALUES (?, ?, ?, ?, ?, DATE(?), DATE(?))"; +" (tlc, olc, dialect, chem_class, chem_type, formula, pdb_initial, pdb_modified) " +" VALUES (?, ?, ?, ?, ?, ?, DATE(?), DATE(?))"; const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms " " (compound_id, name, alt_name, element, is_aromatic, stereo_conf, " @@ -106,11 +107,13 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) compound->GetID().length(), NULL); char olc=compound->GetOneLetterCode(); sqlite3_bind_text(stmt, 2, &olc, 1, NULL); - char chem_type=compound->GetChemClass(); + char chem_class=compound->GetChemClass(); + char chem_type=compound->GetChemType(); char dialect=compound->GetDialect(); sqlite3_bind_text(stmt, 3, &dialect, 1, NULL); - sqlite3_bind_text(stmt, 4, &chem_type, 1, NULL); - sqlite3_bind_text(stmt, 5, compound->GetFormula().c_str(), + sqlite3_bind_text(stmt, 4, &chem_class, 1, NULL); + sqlite3_bind_text(stmt, 5, &chem_type, 1, NULL); + sqlite3_bind_text(stmt, 6, compound->GetFormula().c_str(), compound->GetFormula().length(), NULL); std::stringstream ss; ss << compound->GetCreationDate().year << "-" @@ -121,9 +124,9 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) ss << compound->GetModificationDate().year << "-" << compound->GetModificationDate().month << "-" << compound->GetModificationDate().day; - sqlite3_bind_text(stmt, 6, date.c_str(), date.length(), NULL); + sqlite3_bind_text(stmt, 7, date.c_str(), date.length(), NULL); date=ss.str(); - sqlite3_bind_text(stmt, 7, date.c_str(), date.length(), NULL); + sqlite3_bind_text(stmt, 8, date.c_str(), date.length(), NULL); } else { LOG_ERROR(sqlite3_errmsg(conn_)); sqlite3_finalize(stmt); @@ -242,11 +245,18 @@ CompoundLibPtr CompoundLib::Load(const String& database, bool readonly) int flags=readonly ? SQLITE_OPEN_READONLY : SQLITE_OPEN_READWRITE; CompoundLibPtr lib(new CompoundLib); int retval=sqlite3_open_v2(database.c_str(), &lib->conn_, flags, NULL); - if (SQLITE_OK==retval) { - return lib; + if (SQLITE_OK!=retval) { + LOG_ERROR(sqlite3_errmsg(lib->conn_)); + return CompoundLibPtr(); } - LOG_ERROR(sqlite3_errmsg(lib->conn_)); - return CompoundLibPtr(); + // check if column chem_type exists in database + String aq="SELECT chem_type FROM chem_compounds LIMIT 1"; + sqlite3_stmt* stmt; + retval=sqlite3_prepare_v2(lib->conn_, aq.c_str(), + static_cast<int>(aq.length()), + &stmt, NULL); + lib->chem_type_available_ = retval==SQLITE_OK; + return lib; } void CompoundLib::LoadAtomsFromDB(CompoundPtr comp, int pk) { @@ -308,9 +318,12 @@ CompoundPtr CompoundLib::FindCompound(const String& id, if (i!=compound_cache_.end()) { return i->second; } - String query="SELECT id, tlc, olc, chem_class, dialect, formula " - " FROM chem_compounds" - " WHERE tlc='"+id+"' AND dialect='"+String(1, char(dialect))+"'"; + String query="SELECT id, tlc, olc, chem_class, dialect, formula"; + if(chem_type_available_) { + query+=", chem_type"; + } + query+=" FROM chem_compounds" + " WHERE tlc='"+id+"' AND dialect='"+String(1, char(dialect))+"'"; sqlite3_stmt* stmt; int retval=sqlite3_prepare_v2(conn_, query.c_str(), static_cast<int>(query.length()), @@ -330,6 +343,9 @@ CompoundPtr CompoundLib::FindCompound(const String& id, compound->SetDialect(Compound::Dialect(sqlite3_column_text(stmt, 4)[0])); const char* f=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 5)); compound->SetFormula(f); + if(chem_type_available_) { + compound->SetChemType(mol::ChemType(sqlite3_column_text(stmt, 6)[0])); + } // Load atoms and bonds this->LoadAtomsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk); @@ -348,7 +364,7 @@ CompoundPtr CompoundLib::FindCompound(const String& id, } CompoundLib::CompoundLib() - : conn_(NULL) { + : conn_(NULL), chem_type_available_(false) { } CompoundLib::~CompoundLib() { diff --git a/modules/conop/src/compound_lib.hh b/modules/conop/src/compound_lib.hh index 0db0f763768cdc2870a7289c942d5c4233c999df..a552c5aee7754aea6a6759911389a4931c3ec3d0 100644 --- a/modules/conop/src/compound_lib.hh +++ b/modules/conop/src/compound_lib.hh @@ -52,6 +52,7 @@ private: private: CompoundMap compound_cache_; sqlite3* conn_; + bool chem_type_available_; // weather pdbx_type is available in db }; }} diff --git a/modules/conop/src/rule_based_builder.cc b/modules/conop/src/rule_based_builder.cc index 57c8f515407f89bbcebd8255075b919de2695e6f..35709c287d61181fa0beb5c5920a48ce7e8326b3 100644 --- a/modules/conop/src/rule_based_builder.cc +++ b/modules/conop/src/rule_based_builder.cc @@ -121,6 +121,7 @@ void RuleBasedBuilder::FillResidueProps(mol::ResidueHandle residue) if (!last_compound_) return; residue.SetChemClass(last_compound_->GetChemClass()); + residue.SetChemType(last_compound_->GetChemType()); residue.SetOneLetterCode(last_compound_->GetOneLetterCode()); }; @@ -172,6 +173,7 @@ void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue, LOG_WARNING("residue " << residue << " doesn't look like a standard " << residue.GetKey() << " (" << compound->GetFormula() << ")"); residue.SetChemClass(mol::ChemClass(mol::ChemClass::UNKNOWN)); + residue.SetChemType(mol::ChemType(mol::ChemType::UNKNOWN)); residue.SetOneLetterCode('?'); } } diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index 5934b8341cf3220addbca4eaccd313984c7ecb39..a44dc733803bf7b805d52b839c8d756b3941f929 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -93,7 +93,18 @@ void ChemdictParser::OnDataItem(const StarDataItem& item) << compound_->GetID() << std::endl; } } - + } else if (item.GetName()==StringRef("pdbx_type", 9)) { + String type=item.GetValue().str(); + for (String::iterator i=type.begin(), e=type.end(); i!=e; ++i) { + *i=toupper(*i); + } + std::map<String, mol::ChemType>::iterator i=xtm_.find(type); + if (i!=xtm_.end()) { + compound_->SetChemType(i->second); + } else { + std::cout << "unknown pdbx_type '" << type << "' for compound " + << compound_->GetID() << std::endl; + } } else if (item.GetName()==StringRef("formula", 7)) { compound_->SetFormula(item.GetValue().str()); if (compound_->GetFormula()=="H2 O") { @@ -136,6 +147,7 @@ void ChemdictParser::OnEndData() } std::map<String, mol::ChemClass> ChemdictParser::tm_=std::map<String, mol::ChemClass>(); +std::map<String, mol::ChemType> ChemdictParser::xtm_=std::map<String, mol::ChemType>(); void ChemdictParser::InitTypeMap() { @@ -168,4 +180,20 @@ void ChemdictParser::InitTypeMap() tm_["WATER"]=mol::ChemClass(mol::ChemClass::WATER); } +void ChemdictParser::InitPDBXTypeMap() +{ + if (!xtm_.empty()) + return; + xtm_["HETAI"]=mol::ChemType(mol::ChemType::IONS); + xtm_["HETAIN"]=mol::ChemType(mol::ChemType::NONCANONICALMOLS); + xtm_["ATOMS"]=mol::ChemType(mol::ChemType::SACCHARIDES); + xtm_["ATOMN"]=mol::ChemType(mol::ChemType::NUCLEOTIDES); + xtm_["ATOMP"]=mol::ChemType(mol::ChemType::AMINOACIDS); + xtm_["HETAC"]=mol::ChemType(mol::ChemType::COENZYMES); + xtm_["HETIC"]=mol::ChemType(mol::ChemType::WATERCOORDIONS); + xtm_["HETAD"]=mol::ChemType(mol::ChemType::DRUGS); + xtm_["HETAS"]=mol::ChemType(mol::ChemType::WATERS); + xtm_["?"]=mol::ChemType(mol::ChemType::UNKNOWN); +} + }} diff --git a/modules/io/src/mol/chemdict_parser.hh b/modules/io/src/mol/chemdict_parser.hh index 4df2f4e2ddc0e7425c7c10704d88d39acb1198c9..51142744c37de1e419f39395db04edc926801982 100644 --- a/modules/io/src/mol/chemdict_parser.hh +++ b/modules/io/src/mol/chemdict_parser.hh @@ -25,6 +25,7 @@ #include <ost/mol/chem_class.hh> +#include <ost/mol/chem_type.hh> #include <ost/io/mol/star_parser.hh> #include <ost/conop/compound_lib.hh> @@ -43,6 +44,7 @@ public: last_(0), loop_type_(DONT_KNOW), dialect_(dialect) { this->InitTypeMap(); + this->InitPDBXTypeMap(); } virtual bool OnBeginData(const StringRef& data_name); @@ -62,6 +64,7 @@ public: } private: void InitTypeMap(); + void InitPDBXTypeMap(); conop::CompoundLibPtr lib_; conop::CompoundPtr compound_; typedef enum { @@ -80,6 +83,7 @@ private: int indices_[10]; bool insert_; static std::map<String, mol::ChemClass> tm_; + static std::map<String, mol::ChemType> xtm_; std::map<String, int> atom_map_; LoopType loop_type_; conop::AtomSpec atom_; @@ -90,4 +94,4 @@ private: }} -#endif \ No newline at end of file +#endif diff --git a/modules/mol/base/pymod/export_residue.cc b/modules/mol/base/pymod/export_residue.cc index 75d74bdb577c618094ee18223102dfa56f932abe..272c96d5c9352303759211e78441a9dc1f49d4c1 100644 --- a/modules/mol/base/pymod/export_residue.cc +++ b/modules/mol/base/pymod/export_residue.cc @@ -21,6 +21,7 @@ using namespace boost::python; #include <ost/mol/chem_class.hh> +#include <ost/mol/chem_type.hh> #include <ost/mol/mol.hh> #include <ost/geom/export_helper/vector.hh> using namespace ost; @@ -72,6 +73,21 @@ void export_Residue() ; implicitly_convertible<char, ChemClass>(); + class_<ChemType>("ChemType", init<char>(args("chem_type"))) + .def(self!=self) + .def(self==self) + .def(self_ns::str(self)) + .def("IsIon", &ChemType::IsIon) + .def("IsNucleotide", &ChemType::IsNucleotide) + .def("IsSaccharide", &ChemType::IsSaccharide) + .def("IsAminoAcid", &ChemType::IsAminoAcid) + .def("IsCoenzyme", &ChemType::IsCoenzyme) + .def("IsDrug", &ChemType::IsDrug) + .def("IsNonCanonical", &ChemType::IsNonCanonical) + .def("IsKnown", &ChemType::IsKnown) + ; + implicitly_convertible<char, ChemType>(); + class_<ResNum>("ResNum", init<int>(args("num"))) .def(init<int,char>(args("num", "ins_code"))) .def("GetNum", &ResNum::GetNum) @@ -151,13 +167,15 @@ void export_Residue() .def("GetKey", &ResidueBase::GetKey, return_value_policy<copy_const_reference>()) - .def("GetName", &ResidueBase::GetName, + .def("GetName", &ResidueBase::GetName, return_value_policy<copy_const_reference>()) .def("GetNumber", &ResidueBase::GetNumber, return_value_policy<copy_const_reference>()) .def("GetChemClass", &ResidueBase::GetChemClass) .add_property("chem_class", &ResidueBase::GetChemClass, set_chemclass) .def("SetChemClass", set_chemclass) + .def("GetChemType", &ResidueBase::GetChemType) + .add_property("chem_type", &ResidueBase::GetChemType) .add_property("is_ligand", &ResidueBase::IsLigand, &ResidueBase::SetIsLigand) .def("IsLigand", &ResidueBase::IsLigand) .def("SetIsLigand", &ResidueBase::SetIsLigand) diff --git a/modules/mol/base/src/CMakeLists.txt b/modules/mol/base/src/CMakeLists.txt index ac669907f13fa5d08a2b08cf8bdb146e6f9e2330..1c2649f70ea0230dc4c5021afd9f2a618af60dcd 100644 --- a/modules/mol/base/src/CMakeLists.txt +++ b/modules/mol/base/src/CMakeLists.txt @@ -50,6 +50,7 @@ chain_handle.hh chain_view.hh chain_type.hh chem_class.hh +chem_type.hh coord_group.hh coord_source.hh in_mem_coord_source.hh diff --git a/modules/mol/base/src/chem_type.hh b/modules/mol/base/src/chem_type.hh new file mode 100644 index 0000000000000000000000000000000000000000..eac806433400daeb1ead20ecf88ddff558b64fc7 --- /dev/null +++ b/modules/mol/base/src/chem_type.hh @@ -0,0 +1,104 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2010 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_BASE_CHEM_TYPE_HI +#define OST_BASE_CHEM_TYPE_HI +#include <vector> + +#include <boost/shared_ptr.hpp> + +#include <ost/mol/module_config.hh> + + +namespace ost { namespace mol { + +struct ChemType { + const static char IONS ='I'; + const static char NONCANONICALMOLS ='M'; + const static char SACCHARIDES ='S'; + const static char NUCLEOTIDES ='N'; + const static char AMINOACIDS ='A'; + const static char COENZYMES ='E'; + const static char WATERCOORDIONS ='C'; + const static char DRUGS ='D'; + const static char WATERS ='W'; + const static char UNKNOWN ='U'; + + explicit ChemType(char chem_type) + : chem_type_(chem_type) { + } + + ChemType() + : chem_type_(UNKNOWN) { + } + + bool operator==(const ChemType& cc) const { + return cc.chem_type_==chem_type_; + } + + bool operator!=(const ChemType& cc) const { + return this->operator!=(cc); + } + + bool IsIon() const { + return (chem_type_==ChemType::IONS || + chem_type_==ChemType::WATERCOORDIONS); + } + + bool IsNucleotide() const { + return (chem_type_==ChemType::NUCLEOTIDES); + } + + bool IsSaccharide() const { + return (chem_type_==ChemType::SACCHARIDES); + } + + bool IsAminoAcid() const { + return (chem_type_==ChemType::AMINOACIDS); + } + + bool IsCoenzyme() const { + return (chem_type_==ChemType::COENZYMES); + } + + bool IsDrug() const { + return (chem_type_==ChemType::DRUGS); + } + + bool IsWater() const { + return (chem_type_==ChemType::WATERS); + } + + bool IsNonCanonical() const { + return (chem_type_==ChemType::NONCANONICALMOLS); + } + + bool IsKnown() const { + return (chem_type_!=ChemType::UNKNOWN); + } + + operator char() const { + return chem_type_; + } + +private: + char chem_type_; +}; + +}} // ns +#endif diff --git a/modules/mol/base/src/impl/residue_impl.hh b/modules/mol/base/src/impl/residue_impl.hh index 8b01d32d2f3d8873f8098805dbe62028334dc2ec..b52d26fc7a9449bf84f1a39bbf2cf592be362b37 100644 --- a/modules/mol/base/src/impl/residue_impl.hh +++ b/modules/mol/base/src/impl/residue_impl.hh @@ -34,6 +34,7 @@ #include <ost/mol/impl/torsion_impl_fw.hh> #include <ost/mol/impl/atom_group.hh> #include <ost/mol/chem_class.hh> +#include <ost/mol/chem_type.hh> #include <ost/generic_property.hh> #include <ost/mol/property_id.hh> @@ -196,6 +197,12 @@ public: ChemClass GetChemClass() const { return chem_class_; } + ChemType GetChemType() const { + return chem_type_; + } + void SetChemType(ChemType ct) { + chem_type_=ct; + } TorsionImplP FindTorsion(const String& torsion_name) const; @@ -227,6 +234,7 @@ private: TorsionImplList torsion_list_; SecStructure sec_structure_; ChemClass chem_class_; + ChemType chem_type_; char olc_; // whether the residue is part of the protein. // TODO: this should be fixed to be a enum'ed type aka diff --git a/modules/mol/base/src/residue_base.cc b/modules/mol/base/src/residue_base.cc index 7d15ad5f6fff00b62a223b8f41417f25d0703d8d..07950307d012a33735f6630a5d171d8e8ad8c7ce 100644 --- a/modules/mol/base/src/residue_base.cc +++ b/modules/mol/base/src/residue_base.cc @@ -104,6 +104,18 @@ void ResidueBase::SetChemClass(ChemClass cc) Impl()->SetChemClass(cc); } +void ResidueBase::SetChemType(ChemType ct) +{ + this->CheckValidity(); + Impl()->SetChemType(ct); +} + +ChemType ResidueBase::GetChemType() const +{ + this->CheckValidity(); + return Impl()->GetChemType(); +}; + SecStructure ResidueBase::GetSecStructure() const { this->CheckValidity(); diff --git a/modules/mol/base/src/residue_base.hh b/modules/mol/base/src/residue_base.hh index c994b9a3098b48479300c811d55583cc213ad16c..993683cb223aaf661704ce0c08c81d16f55f1bbf 100644 --- a/modules/mol/base/src/residue_base.hh +++ b/modules/mol/base/src/residue_base.hh @@ -27,6 +27,7 @@ #include <ost/mol/sec_structure.hh> #include <ost/mol/handle_type_fw.hh> #include <ost/mol/chem_class.hh> +#include <ost/mol/chem_type.hh> #include <ost/generic_property.hh> #include "property_id.hh" @@ -55,7 +56,12 @@ namespace ost { namespace mol { /// participate in a peptide bond. For nucleotides, the chemical class is either /// ChemClass::RNA_LINKING or ChemClass::DNA_LINKING. For unknown compounds the /// chemical class is ChemClass::UNKNOWN. -/// +/// +/// In addition, residues have a ChemType. A classification of all components into +/// nine categories (ions, non-canonical molecules, saccharised, nucleotides, +/// amino acids, co-enzymes, water coordinated ions, drugs, solvents) as obtained +/// from the PDB. +/// /// When loading an entity from file, the one-letter and chemical class of a /// residue are assigned by the \ref conop::Builder "default builder". class DLLEXPORT_OST_MOL ResidueBase: @@ -116,7 +122,12 @@ public: ChemClass GetChemClass() const; void SetChemClass(ChemClass cc); - + + /// \brief PDB ligand classification from component dictionary + ChemType GetChemType() const; + + void SetChemType(ChemType ct); + /// \brief Get secondary structure type. By default, the residue is in COIL /// conformation. SecStructure GetSecStructure() const;