diff --git a/modules/conop/pymod/export_builder.cc b/modules/conop/pymod/export_builder.cc index 1e070a04e88b3abe64fd57caf856ea1496850d45..9e23e05698fa91f3d5568e46e5885fb4879c4f70 100644 --- a/modules/conop/pymod/export_builder.cc +++ b/modules/conop/pymod/export_builder.cc @@ -25,8 +25,19 @@ using namespace boost::python; using namespace ost::conop; void export_Builder() { + + enum_<Dialect>("Dialect") + .value("PDB_DIALECT", PDB_DIALECT) + .value("CHARMM_DIALECT", CHARMM_DIALECT) + .export_values() + ; //TODO Export virtual calls as Default* (see export_visitor.cc) class_<Builder>("Builder", no_init) + .add_property("dialect", &Builder::GetDialect, &Builder::SetDialect) + .add_property("strict_hydrogens", &Builder::GetStrictHydrogenMode, + &Builder::SetStrictHydrogenMode) + .def("GetDialect", &Builder::GetDialect) + .def("SetDialect", &Builder::SetDialect) .def("CompleteAtoms", &Builder::CompleteAtoms) .def("CheckResidueCompleteness", &Builder::CheckResidueCompleteness) .def("IdentifyResidue", &Builder::IdentifyResidue) diff --git a/modules/conop/pymod/export_sanitizer.cc b/modules/conop/pymod/export_sanitizer.cc deleted file mode 100644 index 3ed3810c5176de7a83c4e9b8d11aec79c247a741..0000000000000000000000000000000000000000 --- a/modules/conop/pymod/export_sanitizer.cc +++ /dev/null @@ -1,61 +0,0 @@ -//------------------------------------------------------------------------------ -// This file is part of the OpenStructure project <www.openstructure.org> -// -// Copyright (C) 2008-2010 by the OpenStructure authors -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 3.0 of the License, or (at your option) -// any later version. -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -//------------------------------------------------------------------------------ -#include <boost/python.hpp> -#include <boost/python/register_ptr_to_python.hpp> -#include <boost/python/suite/indexing/vector_indexing_suite.hpp> -using namespace boost::python; -#include <ost/conop/sanitizer.hh> -using namespace ost; -using namespace ost::conop; - -namespace { -struct WrappedSanitizer : Sanitizer -{ - WrappedSanitizer(PyObject *p) - : Sanitizer(CompoundLibPtr()), self(p) { - } - - WrappedSanitizer(PyObject *p, const Sanitizer& s) - : Sanitizer(s), self(p) {} - - virtual void OnUnknownAtom(const mol::AtomHandle& atom) { - call_method<void, mol::AtomHandle>(self, "OnUnknownAtom", atom); - } - void OnUnknownAtomDefault(const mol::AtomHandle& atom) { - } - virtual void OnMissingAtom(const mol::ResidueHandle& residue, - const String& atom) { - call_method<void, mol::ResidueHandle, String>(self, "OnMissingAtom", - residue, atom); - } - void OnMissingAtomDefault(const mol::ResidueHandle& residue, - const String& atom) { - } - private: - PyObject* self; -}; - -} - -void export_Sanitizer() { - class_<Sanitizer, WrappedSanitizer, bases<mol::EntityVisitor>,boost::noncopyable >("Sanitizer", init<CompoundLibPtr>()) - .def("OnUnknownAtom", &WrappedSanitizer::OnUnknownAtomDefault) - .def("OnMissingAtom", &WrappedSanitizer::OnMissingAtomDefault) - ; -} diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 4d0840ea34d18bbe4f983c24f17d8ad8603769fd..21749612f5c026516b74e85a5f4ff45638b928a0 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -8,9 +8,11 @@ compound_lib.hh module_config.hh rule_based_builder.hh ring_finder.hh +chemdict_parser.hh ) set(OST_CONOP_SOURCES +chemdict_parser.cc builder.cc conop.cc heuristic_builder.cc diff --git a/modules/conop/src/builder.hh b/modules/conop/src/builder.hh index 9ece21a6344cc97fc9823f52b9b4e09dbedd0a16..5312d80e9e455293b16d6568a8d66971934b8718 100644 --- a/modules/conop/src/builder.hh +++ b/modules/conop/src/builder.hh @@ -29,6 +29,10 @@ namespace ost { namespace conop { +typedef enum { + PDB_DIALECT, + CHARMM_DIALECT +} Dialect; /// \brief abstract builder interface /// /// A builder serves several purposes: \li knows how to connect atoms of @@ -49,14 +53,20 @@ namespace ost { namespace conop { /// behaviour. class DLLEXPORT_OST_CONOP Builder { public: -public: + + Builder(): dialect_(PDB_DIALECT), strict_(false) { } virtual ~Builder(); /// \brief add any missing atoms to the residue based on its key, /// with coordinates set to zero virtual void CompleteAtoms(mol::ResidueHandle rh); - + virtual void SetDialect(Dialect dialect) { dialect_=dialect; } + + virtual void SetStrictHydrogenMode(bool strict) { strict_=strict; } + bool GetStrictHydrogenMode() const { return strict_; } + + Dialect GetDialect() const { return dialect_; } /// \brief verify that the given residue has all atoms it /// is supposed to have based on its key virtual void CheckResidueCompleteness(const mol::ResidueHandle& rh); @@ -131,6 +141,9 @@ public: /// |brief Connect \p atom with all atoms for whith IsBondFeasible() and /// AreResiduesConsecutive() returns true void DistanceBasedConnect(mol::AtomHandle atom); +private: + Dialect dialect_; + bool strict_; }; diff --git a/modules/conop/src/chemdict_parser.cc b/modules/conop/src/chemdict_parser.cc index 2c7df33c0928978cb91371e82fa3ba5565d949f1..511add0a6a6fc8ee6618f66478c75b79f062cc7d 100644 --- a/modules/conop/src/chemdict_parser.cc +++ b/modules/conop/src/chemdict_parser.cc @@ -5,6 +5,7 @@ namespace ost { namespace conop { bool ChemdictParser::OnBeginData(const StringRef& data_name) { compound_.reset(new Compound(data_name.str())); + compound_->SetDialect(dialect_); if (last_!=data_name[0]) { last_=data_name[0]; std::cout << last_ << std::flush; @@ -76,6 +77,7 @@ void ChemdictParser::OnDataItem(const io::StarDataItem& item) // The type of water is set to "?". let's change it to water... if (compound_->GetID()=="HOH") { compound_->SetChemClass(mol::ChemClass(mol::ChemClass::Water)); + compound_->SetOneLetterCode('.'); } else { std::map<String, mol::ChemClass>::iterator i=tm_.find(type); if (i!=tm_.end()) { @@ -148,6 +150,7 @@ void ChemdictParser::InitTypeMap() tm_["NON-POLYMER"]=mol::ChemClass(mol::ChemClass::NonPolymer); tm_["RNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNALinking); tm_["?"]=mol::ChemClass(mol::ChemClass::Unknown); + tm_["WATER"]=mol::ChemClass(mol::ChemClass::Water); } }} \ No newline at end of file diff --git a/modules/conop/src/chemdict_parser.hh b/modules/conop/src/chemdict_parser.hh index 4753a389acdfa2fa7e71dbee74c2debbbfbf9a77..f08b84b2cd649a64ae78040db3ae89b7444d3ccd 100644 --- a/modules/conop/src/chemdict_parser.hh +++ b/modules/conop/src/chemdict_parser.hh @@ -38,9 +38,9 @@ typedef enum { class DLLEXPORT_OST_CONOP ChemdictParser : public io::StarParser { public: - ChemdictParser(std::istream& stream): - io::StarParser(stream), - compound_(new Compound("UNK")), last_(0), loop_type_(DONT_KNOW) + ChemdictParser(std::istream& stream, Compound::Dialect dialect): + io::StarParser(stream), compound_(new Compound("UNK")), + last_(0), loop_type_(DONT_KNOW), dialect_(dialect) { this->InitTypeMap(); } @@ -83,7 +83,11 @@ private: std::map<String, int> atom_map_; LoopType loop_type_; AtomSpec atom_; + Compound::Dialect dialect_; }; }} + + +#endif \ No newline at end of file diff --git a/modules/conop/src/chemdict_tool.cc b/modules/conop/src/chemdict_tool.cc index 8671b4fa4ebcba3bd1ece97d8aceab47c58c522d..929c49745b0055c4fa20be52ab1a3535727f7b48 100644 --- a/modules/conop/src/chemdict_tool.cc +++ b/modules/conop/src/chemdict_tool.cc @@ -28,210 +28,14 @@ #include <boost/iostreams/filtering_stream.hpp> #include <boost/iostreams/filter/gzip.hpp> -#include <ost/mol/mol.hh> -#include <ost/io/mol/star_parser.hh> -#include <ost/conop/compound_lib.hh> - -using namespace ost; +#include <ost/conop/chemdict_parser.hh> -typedef enum { - ATOM_SPEC, - BOND_SPEC, - DONT_KNOW -} LoopType; - -class ChemDictParser : public io::StarParser { -public: - ChemDictParser(std::istream& stream): - io::StarParser(stream), - compound_(new conop::Compound("UNK")), last_(0), loop_type_(DONT_KNOW) - { - this->InitTypeMap(); - } - - virtual bool OnBeginData(const StringRef& data_name) - { - compound_.reset(new conop::Compound(data_name.str())); - if (last_!=data_name[0]) { - last_=data_name[0]; - std::cout << last_ << std::flush; - } - atom_map_.clear(); - insert_=true; - return true; - } - - virtual bool OnBeginLoop(const io::StarLoopDesc& header) - { - if (header.GetCategory()=="chem_comp_atom") { - loop_type_=ATOM_SPEC; - indices_[ATOM_NAME]=header.GetIndex("atom_id"); - indices_[ALT_ATOM_NAME]=header.GetIndex("alt_atom_id"); - indices_[ELE]=header.GetIndex("type_symbol"); - indices_[IS_LEAVING]=header.GetIndex("pdbx_leaving_atom_flag"); - indices_[IS_AROMATIC]=header.GetIndex("pdbx_aromatic_flag"); - indices_[ORDINAL]=header.GetIndex("pdbx_ordinal"); - return true; - } else if (header.GetCategory()=="chem_comp_bond") { - loop_type_=BOND_SPEC; - indices_[ATOM_ID1]=header.GetIndex("atom_id_1"); - indices_[ATOM_ID2]=header.GetIndex("atom_id_2"); - indices_[BOND_ORDER]=header.GetIndex("value_order"); - return true; - } - loop_type_=DONT_KNOW; - return false; - } - - virtual void OnDataRow(const io::StarLoopDesc& header, - const std::vector<StringRef>& columns) - { - if (loop_type_==ATOM_SPEC) { - // compound_->AddBond() - conop::AtomSpec atom; - atom.is_leaving=columns[indices_[IS_LEAVING]][0]=='Y'; - atom.name=columns[indices_[ATOM_NAME]].str(); - atom.alt_name=columns[indices_[ALT_ATOM_NAME]].str(); - atom.ordinal=columns[indices_[ORDINAL]].to_int().second-1; - atom.element=columns[indices_[ELE]].str(); - atom.is_aromatic=columns[indices_[IS_AROMATIC]][0]=='Y'; - compound_->AddAtom(atom); - atom_map_[atom.name]=atom.ordinal; - } else if (loop_type_==BOND_SPEC) { - conop::BondSpec bond; - String atom_one=columns[indices_[ATOM_ID1]].str(); - String atom_two=columns[indices_[ATOM_ID2]].str(); - bond.atom_one=atom_map_[atom_one]; - bond.atom_two=atom_map_[atom_two]; - char b=columns[indices_[BOND_ORDER]][0]; - bond.order=b=='D' ? 2 : (b=='S' ? 1 : 3); - if (bond.atom_one>bond.atom_two) { - std::swap(bond.atom_one, bond.atom_two); - } - compound_->AddBond(bond); - } - } - - virtual void OnDataItem(const io::StarDataItem& item) - { - if (item.GetCategory()==StringRef("chem_comp", 9)) { - if (item.GetName()==StringRef("type", 4)) { - // convert type to uppercase - String type=item.GetValue().str(); - for (String::iterator i=type.begin(), e=type.end(); i!=e; ++i) { - *i=toupper(*i); - } - // The type of water is set to "?". let's change it to water... - if (compound_->GetID()=="HOH") { - compound_->SetChemClass(mol::ChemClass(mol::ChemClass::Water)); - } else { - std::map<String, mol::ChemClass>::iterator i=tm_.find(type); - if (i!=tm_.end()) { - compound_->SetChemClass(i->second); - } else { - std::cout << "unknown type '" << type << "' for compound " - << compound_->GetID() << std::endl; - } - } - - } else if (item.GetName()==StringRef("formula", 7)) { - compound_->SetFormula(item.GetValue().str()); - } else if (item.GetName()==StringRef("one_letter_code", 15)) { - if (item.GetValue().length()==1) { - compound_->SetOneLetterCode(item.GetValue()[0]); - } - } else if (item.GetName()==StringRef("pdbx_initial_date", 17)) { - compound_->SetCreationDate(conop::Date::FromString(item.GetValue())); - } else if (item.GetName()==StringRef("pdbx_modified_date", 18)) { - compound_->SetModificationDate(conop::Date::FromString(item.GetValue())); - } - } else if (item.GetName()==StringRef("atom_id", 7)) { - atom_.name=item.GetValue().str(); - } else if (item.GetName()==StringRef("alt_atom_id", 11)) { - atom_.alt_name=item.GetValue().str(); - } else if (item.GetName()==StringRef("type_symbol", 11)) { - atom_.element=item.GetValue().str(); - } else if (item.GetName()==StringRef("pdbx_ordinal", 12)) { - atom_.ordinal=item.GetValue().to_int().second-1; - } - } - - virtual void OnEndData() - { - if (insert_) { - if (compound_->GetAtomSpecs().empty()) { - compound_->AddAtom(atom_); - } - lib_->AddCompound(compound_); - } - } - - void SetCompoundLib(const conop::CompoundLibPtr& lib) - { - lib_=lib; - } -private: - void InitTypeMap(); - conop::CompoundLibPtr lib_; - conop::CompoundPtr compound_; - typedef enum { - ATOM_NAME=0, - ALT_ATOM_NAME=1, - IS_AROMATIC=2, - ORDINAL=3, - IS_LEAVING=4, - ELE=5, - STEREO_CONF=6, - ATOM_ID1=0, - ATOM_ID2=1, - BOND_ORDER=2 - } PropIndex; - char last_; - int indices_[10]; - bool insert_; - static std::map<String, mol::ChemClass> tm_; - std::map<String, int> atom_map_; - LoopType loop_type_; - conop::AtomSpec atom_; -}; - -std::map<String, mol::ChemClass> ChemDictParser::tm_=std::map<String, mol::ChemClass>(); - -void ChemDictParser::InitTypeMap() -{ - if (!tm_.empty()) - return; - tm_["L-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["L-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::DPeptideLinking); - tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::LSaccharide); - tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::DSaccharide); - tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::LSaccharide); - tm_["D-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::DSaccharide); - tm_["SACCHARIDE"]=mol::ChemClass(mol::ChemClass::Saccharide); - tm_["D-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::DPeptideLinking); - tm_["L-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["L-PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["L-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["L-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["R-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["R-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["DNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["PEPTIDE-LIKE"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["NON-POLYMER"]=mol::ChemClass(mol::ChemClass::NonPolymer); - tm_["RNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["?"]=mol::ChemClass(mol::ChemClass::Unknown); - -} +using namespace ost; void PrintUsage() { - std::cout << "usage: chemdict_tool action compound-dict db" << std::endl; + std::cout << "usage: chemdict_tool action <compound-dict> <db> (pdb|charmm|amber|opls)" << std::endl; std::cout << "supported actions are:" << std::endl; std::cout << " create - creates a new db " << std::endl; std::cout << " update - update existing db" << std::endl; @@ -239,17 +43,34 @@ void PrintUsage() int main(int argc, char const *argv[]) { - if (argc!=4) { + if (argc!=4 && argc!=5) { PrintUsage(); return 0; } + conop::Compound::Dialect dialect=conop::Compound::PDB; + if (argc==5) { + String format=argv[4]; + + if (format=="charmm") { + dialect=conop::Compound::CHARMM; + } else if (format=="pdb") { + dialect=conop::Compound::PDB; + } else if (format=="opls") { + dialect=conop::Compound::OPLS; + } else if (format=="amber") { + dialect=conop::Compound::AMBER; + } else { + PrintUsage(); + return 0; + } + } boost::iostreams::filtering_stream<boost::iostreams::input> filtered_istream; std::ifstream istream(argv[2]); if (boost::iequals(".gz", boost::filesystem::extension(argv[2]))) { filtered_istream.push(boost::iostreams::gzip_decompressor()); } filtered_istream.push(istream); - ChemDictParser cdp(filtered_istream); + conop::ChemdictParser cdp(filtered_istream, dialect); conop::CompoundLibPtr compound_lib; if (!strcmp(argv[1], "create")) { compound_lib=conop::CompoundLib::Create(argv[3]); @@ -267,4 +88,4 @@ int main(int argc, char const *argv[]) cdp.Parse(); in_mem_lib->Copy(argv[3]); return 0; -} +} \ No newline at end of file diff --git a/modules/conop/src/compound.hh b/modules/conop/src/compound.hh index 6df62c8567e14acc00cbd45f0cdb8153ae081ea5..f381649ab30a3a77fe4ff06f6cf6b87a600faaff 100644 --- a/modules/conop/src/compound.hh +++ b/modules/conop/src/compound.hh @@ -112,15 +112,39 @@ typedef boost::shared_ptr<Compound> CompoundPtr; /// \brief Knows about the atoms and bonds of a chemical compounds class DLLEXPORT_OST_CONOP Compound { public: + typedef enum { + PDB ='P', + CHARMM ='C', + OPLS ='O', + AMBER ='A', + } Dialect; + Compound(const String& id) - : olc_('?'), tlc_(id), chem_class_() { + : olc_('?'), tlc_(id), chem_class_(), dialect_(Compound::PDB) { } /// \brief three-letter code that is unique for every compound const String& GetID() const { return tlc_; } - + Dialect GetDialect() const { return dialect_; } + + String GetDialectAsString() const { + switch (dialect_) { + case CHARMM: + return "CHARMM"; + case PDB: + return "PDB"; + case OPLS: + return "OPLS"; + case AMBER: + return "AMBER"; + default: + return ""; + } + } + void SetDialect(Dialect dialect) { dialect_=dialect; } + void SetOneLetterCode(char olc) { olc_=olc; } @@ -192,10 +216,11 @@ private: AtomSpecList atom_specs_; BondSpecList bond_specs_; mol::ChemClass chem_class_; + Dialect dialect_; Date creation_date_; Date mod_date_; }; }} -#endif +#endif \ No newline at end of file diff --git a/modules/conop/src/compound_lib.cc b/modules/conop/src/compound_lib.cc index 75cfbd804d642b8ea5140eb4417b6f70e3a877b3..119712d18ff4a09459c4d832cb4d716b973f071c 100644 --- a/modules/conop/src/compound_lib.cc +++ b/modules/conop/src/compound_lib.cc @@ -37,13 +37,16 @@ namespace { const char* CREATE_CMD[]={ "CREATE TABLE IF NOT EXISTS chem_compounds ( " " id INTEGER PRIMARY KEY AUTOINCREMENT, " -" tlc VARCHAR(3) UNIQUE NOT NULL, " +" tlc VARCHAR(3) NOT NULL, " " olc VARCHAR(1) NOT NULL, " +" dialect VARCHAR(1) NOT NULL, " " chem_class VARCHAR(1), " " formula VARCHAR(64) NOT NULL, " " pdb_initial TIMESTAMP, " " pdb_modified TIMESTAMP " ");", +" CREATE UNIQUE INDEX IF NOT EXISTS commpound_tlc_index ON chem_compounds " +" (tlc, dialect)", "CREATE TABLE IF NOT EXISTS atoms ( " " id INTEGER PRIMARY KEY AUTOINCREMENT, " " compound_id INTEGER REFERENCES chem_compounds (id) ON DELETE CASCADE, " @@ -76,8 +79,8 @@ const char* CREATE_CMD[]={ const char* INSERT_COMPOUND_STATEMENT="INSERT INTO chem_compounds " -" (tlc, olc, chem_class, formula, pdb_initial, pdb_modified) " -" VALUES (?, ?, ?, ?, DATE(?), DATE(?))"; +" (tlc, olc, dialect, chem_class, formula, pdb_initial, pdb_modified) " +" VALUES (?, ?, ?, ?, ?, DATE(?), DATE(?))"; const char* INSERT_ATOM_STATEMENT="INSERT INTO atoms " " (compound_id, name, alt_name, element, is_aromatic, stereo_conf, " @@ -104,8 +107,10 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) char olc=compound->GetOneLetterCode(); sqlite3_bind_text(stmt, 2, &olc, 1, NULL); char chem_type=compound->GetChemClass(); - sqlite3_bind_text(stmt, 3, &chem_type, 1, NULL); - sqlite3_bind_text(stmt, 4, compound->GetFormula().c_str(), + char dialect=compound->GetDialect(); + sqlite3_bind_text(stmt, 3, &dialect, 1, NULL); + sqlite3_bind_text(stmt, 4, &chem_type, 1, NULL); + sqlite3_bind_text(stmt, 5, compound->GetFormula().c_str(), compound->GetFormula().length(), NULL); std::stringstream ss; ss << compound->GetCreationDate().year << "-" @@ -116,9 +121,9 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) ss << compound->GetModificationDate().year << "-" << compound->GetModificationDate().month << "-" << compound->GetModificationDate().day; - sqlite3_bind_text(stmt, 5, date.c_str(), date.length(), NULL); + sqlite3_bind_text(stmt, 6, date.c_str(), date.length(), NULL); date=ss.str(); - sqlite3_bind_text(stmt, 6, date.c_str(), date.length(), NULL); + sqlite3_bind_text(stmt, 7, date.c_str(), date.length(), NULL); } else { LOG_ERROR(sqlite3_errmsg(conn_)); sqlite3_finalize(stmt); @@ -127,7 +132,8 @@ void CompoundLib::AddCompound(const CompoundPtr& compound) retval=sqlite3_step(stmt); if (SQLITE_DONE!=retval) { if (sqlite3_errcode(conn_)==SQLITE_CONSTRAINT) { - LOG_ERROR("Compound '" << compound->GetID() << "' already exists."); + LOG_ERROR("Compound '" << compound->GetID() << "' already exists for the " + << compound->GetDialectAsString() << " dialect."); } else { LOG_ERROR(sqlite3_errmsg(conn_)); } @@ -193,12 +199,12 @@ CompoundLibPtr CompoundLib::Copy(const String& filename) const } int rc=sqlite3_errcode(clone->conn_); if (rc!=SQLITE_OK) { - std::cout << sqlite3_errmsg(clone->conn_) << std::endl; + LOG_ERROR(sqlite3_errmsg(clone->conn_)); return CompoundLibPtr(); } return clone; } - std::cout << sqlite3_errmsg(clone->conn_) << std::endl; + LOG_ERROR(sqlite3_errmsg(clone->conn_)); return CompoundLibPtr(); } @@ -218,7 +224,7 @@ CompoundLibPtr CompoundLib::Create(const String& database) sqlite3_finalize(stmt); assert(SQLITE_DONE==retval); } else { - std::cout << sqlite3_errmsg(lib->conn_) << std::endl; + LOG_ERROR(sqlite3_errmsg(lib->conn_)); sqlite3_finalize(stmt); return CompoundLibPtr(); } @@ -226,7 +232,7 @@ CompoundLibPtr CompoundLib::Create(const String& database) } return lib; } - std::cout << sqlite3_errmsg(lib->conn_) << std::endl; + LOG_ERROR(sqlite3_errmsg(lib->conn_)); return CompoundLibPtr(); } @@ -238,12 +244,14 @@ CompoundLibPtr CompoundLib::Load(const String& database) if (SQLITE_OK==retval) { return lib; } - std::cout << sqlite3_errmsg(lib->conn_) << std::endl; + LOG_ERROR(sqlite3_errmsg(lib->conn_)); return CompoundLibPtr(); } void CompoundLib::LoadAtomsFromDB(CompoundPtr comp, int pk) { - String aq=str(format("SELECT name, alt_name, element, ordinal, is_leaving FROM atoms WHERE compound_id=%d ORDER BY ordinal ASC") % pk); + String aq=str(format("SELECT name, alt_name, element, ordinal, is_leaving " + "FROM atoms WHERE compound_id=%d " + "ORDER BY ordinal ASC") % pk); sqlite3_stmt* stmt; int retval=sqlite3_prepare_v2(conn_, aq.c_str(), static_cast<int>(aq.length()), @@ -259,6 +267,8 @@ void CompoundLib::LoadAtomsFromDB(CompoundPtr comp, int pk) { atom_sp.is_leaving=bool(sqlite3_column_int(stmt, 4)!=0); comp->AddAtom(atom_sp); } + } else { + LOG_ERROR(sqlite3_errmsg(conn_)); } sqlite3_finalize(stmt); } @@ -285,16 +295,20 @@ void CompoundLib::LoadBondsFromDB(CompoundPtr comp, int pk) { bond_sp.order=sqlite3_column_int(stmt, 2); comp->AddBond(bond_sp); } - } + } else { + LOG_ERROR(sqlite3_errmsg(conn_)); + } sqlite3_finalize(stmt); } -CompoundPtr CompoundLib::FindCompound(const String& id) { +CompoundPtr CompoundLib::FindCompound(const String& id, + Compound::Dialect dialect) { CompoundMap::iterator i=compound_cache_.find(id); if (i!=compound_cache_.end()) { return i->second; } - String query="select id, tlc, olc, chem_class from chem_compounds where tlc='"+id+"'"; + String query="SELECT id, tlc, olc, chem_class, dialect FROM chem_compounds" + " WHERE tlc='"+id+"' AND dialect='"+String(1, char(dialect))+"'"; sqlite3_stmt* stmt; int retval=sqlite3_prepare_v2(conn_, query.c_str(), static_cast<int>(query.length()), @@ -307,10 +321,12 @@ CompoundPtr CompoundLib::FindCompound(const String& id) { } if (SQLITE_ROW==ret) { int pk=sqlite3_column_int(stmt, 0); + std::cout << pk << std::endl; const char* id=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1)); CompoundPtr compound(new Compound(id)); compound->SetOneLetterCode((sqlite3_column_text(stmt, 2))[0]); compound->SetChemClass(mol::ChemClass(sqlite3_column_text(stmt, 3)[0])); + compound->SetDialect(Compound::Dialect(sqlite3_column_text(stmt, 4)[0])); // Load atoms and bonds this->LoadAtomsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk); @@ -320,7 +336,7 @@ CompoundPtr CompoundLib::FindCompound(const String& id) { } assert(SQLITE_DONE==sqlite3_step(stmt)); } else { - std::cout << "ERROR: " << sqlite3_errmsg(conn_) << std::endl; + LOG_ERROR("ERROR: " << sqlite3_errmsg(conn_)); sqlite3_finalize(stmt); return CompoundPtr(); } diff --git a/modules/conop/src/compound_lib.hh b/modules/conop/src/compound_lib.hh index f8d1b6c2e82163def3472176046903627da3c92e..8948e3479585f61e0fd9fa3d8b4b5c747fa97ccf 100644 --- a/modules/conop/src/compound_lib.hh +++ b/modules/conop/src/compound_lib.hh @@ -41,7 +41,7 @@ public: static CompoundLibPtr Create(const String& database); ~CompoundLib(); - CompoundPtr FindCompound(const String& id); + CompoundPtr FindCompound(const String& id, Compound::Dialect dialect); void AddCompound(const CompoundPtr& compound); CompoundLibPtr Copy(const String& filename) const; void ClearCache(); diff --git a/modules/conop/src/rule_based_builder.cc b/modules/conop/src/rule_based_builder.cc index d79dbf75b5838e2dd80c21c8b27bd7e59b83cb3a..daaaa050a2b03a539b7534615a5ec9eb883bc7f3 100644 --- a/modules/conop/src/rule_based_builder.cc +++ b/modules/conop/src/rule_based_builder.cc @@ -71,7 +71,8 @@ bool RuleBasedBuilder::HasUnknownAtoms(mol::ResidueHandle res) mol::AtomHandleList::iterator i=atoms.begin(); for (mol::AtomHandleList::iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max()) { + if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max() && + this->GetStrictHydrogenMode() && (*i).GetElement()!="H") { return true; } } @@ -113,11 +114,14 @@ struct OrdinalComp { void RuleBasedBuilder::LookupCompound(const mol::ResidueHandle& rh) { - if ((last_compound_) && (rh.GetName()==last_compound_->GetID())) { - return; - } else { - last_compound_= compound_lib_->FindCompound(rh.GetName()); - } + Compound::Dialect dialect=this->GetDialect()==PDB_DIALECT ? Compound::PDB : Compound::CHARMM; + if ((last_compound_) && (rh.GetName()==last_compound_->GetID())) { + return; + } + last_compound_=compound_lib_->FindCompound(rh.GetName(), dialect); + if (!last_compound_ && this->GetDialect()!=PDB_DIALECT) { + last_compound_=compound_lib_->FindCompound(rh.GetName(), Compound::PDB); + } } void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue, @@ -211,9 +215,18 @@ void RuleBasedBuilder::ConnectAtomsOfResidue(mol::ResidueHandle rh) mol::AtomHandle a1=this->LocateAtom(atoms, bond.atom_one); mol::AtomHandle a2=this->LocateAtom(atoms, bond.atom_two); if (a1.IsValid() && a2.IsValid() && this->IsBondFeasible(a1, a2)) { + if (this->GetStrictHydrogenMode() && + (a1.GetElement()=="H" || a2.GetElement()=="H")) { + continue; + } e.Connect(a1, a2, bond.order); } } + for (mol::AtomHandleList::iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if ((*i).GetElement()=="H" && (*i).GetBondCount()==0) { + this->DistanceBasedConnect(*i); + } + } } void RuleBasedBuilder::ConnectResidueToNext(mol::ResidueHandle rh, @@ -222,9 +235,10 @@ void RuleBasedBuilder::ConnectResidueToNext(mol::ResidueHandle rh, if (!next.IsValid()) { return; } + Compound::Dialect dialect=this->GetDialect()==PDB_DIALECT ? Compound::PDB : Compound::CHARMM; mol::XCSEditor e=rh.GetEntity().RequestXCSEditor(mol::BUFFERED_EDIT); - CompoundPtr mc=compound_lib_->FindCompound(rh.GetName()); - CompoundPtr nc=compound_lib_->FindCompound(next.GetName()); + CompoundPtr mc=compound_lib_->FindCompound(rh.GetName(), dialect); + CompoundPtr nc=compound_lib_->FindCompound(next.GetName(), dialect); if (!(mc && nc)) return; // check if both of the residues are able to form a peptide bond. @@ -300,10 +314,12 @@ void RuleBasedBuilder::FillAtomProps(mol::AtomHandle atom) { LookupCompound(atom.GetResidue()); if (!last_compound_) { + this->OnUnknownAtom(atom); return; } int index=last_compound_->GetAtomSpecIndex(atom.GetName()); if (index==-1) { + this->OnUnknownAtom(atom); return; } const AtomSpec& atom_spec=last_compound_->GetAtomSpecs()[index]; diff --git a/modules/conop/src/rule_based_builder.hh b/modules/conop/src/rule_based_builder.hh index d04f422727d7edf80718a61f2195a933d83f4a29..ff35e0adb1cd47f79b36fcc3ec80fa941a2f39cf 100644 --- a/modules/conop/src/rule_based_builder.hh +++ b/modules/conop/src/rule_based_builder.hh @@ -43,6 +43,14 @@ public: : compound_lib_(compound_lib), last_compound_() { } + virtual void SetDialect(Dialect dialect) { + if (this->GetDialect()!=dialect) { + Builder::SetDialect(dialect); + last_compound_=CompoundPtr(); + compound_lib_->ClearCache(); + } + } + /// \brief fill atom properties such as element and radius virtual void FillAtomProps(mol::AtomHandle atom); diff --git a/modules/conop/src/sanitizer.cc b/modules/conop/src/sanitizer.cc deleted file mode 100644 index 6f53ea8bcda7d8f865f069b060f0938f162fe69b..0000000000000000000000000000000000000000 --- a/modules/conop/src/sanitizer.cc +++ /dev/null @@ -1,108 +0,0 @@ -//------------------------------------------------------------------------------ -// This file is part of the OpenStructure project <www.openstructure.org> -// -// Copyright (C) 2008-2010 by the OpenStructure authors -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 3.0 of the License, or (at your option) -// any later version. -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -//------------------------------------------------------------------------------ -#include "sanitizer.hh" -#include <ost/mol/impl/atom_impl.hh> -#include <ost/log.hh> -#include <limits> -#include <ost/mol/impl/residue_impl.hh> -namespace ost { namespace conop { - - -Sanitizer::Sanitizer(const CompoundLibPtr& lib) - :compound_lib_(lib) { -} -bool Sanitizer::VisitAtom(const mol::AtomHandle& atom) { - return true; -} - -bool Sanitizer::VisitResidue(const mol::ResidueHandle& residue) { - if (CompoundPtr comp=this->IdentifyResidue(residue)) { - this->ReorderAtoms(residue, comp); - this->VerifyCompleteness(residue, comp); - return true; - } - LOG_ERROR("Unknown residue with name " << residue.GetName()); - return false; -} - -void Sanitizer::FillAtomProps(mol::AtomHandle atom, const AtomSpec& spec) { - mol::AtomProp props=atom.GetAtomProps(); - if (props.element!=spec.element) { - props.element=spec.element; - LOG_INFO("Correcting element for " << atom.GetQualifiedName() << - " (now " << spec.element << ", was " << props.element << ")"); - atom.SetAtomProps(props); - } -} - -void Sanitizer::VerifyCompleteness(const mol::ResidueHandle& residue, - CompoundPtr compound) { - AtomSpecList::const_iterator j=compound->GetAtomSpecs().begin(); - mol::AtomHandleList atoms=residue.GetAtomList(); - mol::AtomHandleList::iterator i=atoms.begin(); - for (; j!=compound->GetAtomSpecs().end() && i!=atoms.end(); ++j) { - if ((*j).is_leaving || (*j).element=="H") - continue; - if ((*j).ordinal!=static_cast<int>((*i).Impl()->GetState())) { - this->OnMissingAtom(residue, (*j).name); - } else { - this->FillAtomProps(*i, *j); - ++i; - } - } -} - -struct OrdinalComp { - bool operator()(const mol::impl::AtomImplPtr& a, - const mol::impl::AtomImplPtr& b) const { - return a->GetState()<b->GetState(); - } -}; - -void Sanitizer::ReorderAtoms(const mol::ResidueHandle& residue, - CompoundPtr compound) { - mol::impl::ResidueImplPtr impl=residue.Impl(); - mol::impl::AtomImplList::iterator i=impl->GetAtomList().begin(); - for (; i!=impl->GetAtomList().end(); ++i) { - mol::impl::AtomImplPtr atom=*i; - int index=compound->GetAtomSpecIndex(atom->GetName()); - if (index==-1) { - - this->OnUnknownAtom(mol::AtomHandle(atom)); - atom->SetState(std::numeric_limits<int>::max()); - continue; - } - atom->SetState((compound->GetAtomSpecs())[index].ordinal); - } - std::sort(impl->GetAtomList().begin(), impl->GetAtomList().end(), - OrdinalComp()); -} - -bool Sanitizer::VisitChain(const mol::ChainHandle& chain) { - return true; -} - -CompoundPtr Sanitizer::IdentifyResidue(const mol::ResidueHandle& residue) { - CompoundPtr comp=compound_lib_->FindCompound(residue.GetName()); - if (!comp) { - return this->OnUnknownResidue(residue); - } - return comp; -} -}} diff --git a/modules/conop/src/sanitizer.hh b/modules/conop/src/sanitizer.hh deleted file mode 100644 index 609414c7c420a59963bac2747d57cb7edfd58e95..0000000000000000000000000000000000000000 --- a/modules/conop/src/sanitizer.hh +++ /dev/null @@ -1,65 +0,0 @@ -//------------------------------------------------------------------------------ -// This file is part of the OpenStructure project <www.openstructure.org> -// -// Copyright (C) 2008-2010 by the OpenStructure authors -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 3.0 of the License, or (at your option) -// any later version. -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -//------------------------------------------------------------------------------ -#ifndef OST_CONOP_SANITIZER_HH -#define OST_CONOP_SANITIZER_HH - -#include <ost/mol/mol.hh> -#include <ost/conop/compound_lib.hh> - -namespace ost { namespace conop { - -/// \brief Structural cleanup -/// -/// \todo Sanitizer acts both as the checker and the instance responsible for -/// cleaning up structural problems. Is this desired? -class DLLEXPORT_OST_CONOP Sanitizer : public mol::EntityVisitor { -public: - Sanitizer(const CompoundLibPtr& compound_lib); - - virtual bool VisitAtom(const mol::AtomHandle& atom); - - virtual bool VisitResidue(const mol::ResidueHandle& residue); - - virtual bool VisitChain(const mol::ChainHandle& chain); - - virtual CompoundPtr OnUnknownResidue(const mol::ResidueHandle& residue) { - return CompoundPtr(); - } - - virtual void OnUnknownAtom(const mol::AtomHandle& atom) { } - virtual void OnMissingAtom(const mol::ResidueHandle& residue, - const String& atom_name) { } -private: - void FillAtomProps(mol::AtomHandle atom, const AtomSpec& spec); - /// Change internal order of atoms in residue to the order given by compound - void ReorderAtoms(const mol::ResidueHandle& residue, CompoundPtr compound); - /// Verify all atoms are present. - /// - /// The check exludes hydrogen atoms as well as "leaving atoms", i.e. - /// carboxy-terminal atoms that leave when the peptide bond is formed. - - void VerifyCompleteness(const mol::ResidueHandle& residue, CompoundPtr compound); - CompoundPtr IdentifyResidue(const mol::ResidueHandle& residue); - - - CompoundLibPtr compound_lib_; -}; - -}} -#endif diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 2683ed3612290df79fa70d0f9cb75446aaa65dd6..2cf6900a10edd878f78a2b748830d33f2f102afe 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -47,7 +47,8 @@ def __GetModelFromPDB(model_id, output_dir, file_pattern='pdb%s.ent.gz'): def LoadPDB(filename, restrict_chains="", no_hetatms=False, fault_tolerant=False, load_multi=False, - join_spread_atom_records=False, calpha_only=False, remote=False): + join_spread_atom_records=False, calpha_only=False, + remote=False, dialect='PDB', strict_hydrogens=False): """ Load PDB file from disk and returns one or more entities. Several options allow to customize the exact behaviour of the PDB import. @@ -72,10 +73,26 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=False, pdb repository www.pdb.org. :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is True. - + :param dialect: Specifies the particular dialect to use. By default, the + official PDB format is used. Alternatively, by setting the dialect to + CHARMM, the loading is optimized for CHARMM PDB files. + :type dialect: :class:`str` + + :param strict_hydrogens: whether hydrogen names should be strictly checked. + It is very common for PDB files to not follow the correct naming + conventions for hydrogen atoms. That's why by default, the names of the + hydrogens are not required to be correct. Rather, the connectivity is + inferred with distance-based checks. By turning this flag on, the names + of the hydrogen atoms are checked against the names in the database like + all other atom types. + :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or inexistent file """ + + if dialect not in ('PDB', 'CHARMM',): + raise ValueError('dialect must be PDB or CHARMM') + if remote: output_dir = tempfile.gettempdir() if __GetModelFromPDB(filename, output_dir): @@ -85,8 +102,12 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=False, conop_inst=conop.Conopology.Instance() builder=conop_inst.GetBuilder("DEFAULT") + if dialect=='PDB': + builder.dialect=conop.PDB_DIALECT + elif dialect=='CHARMM': + builder.dialect=conop.CHARMM_DIALECT + builder.strict_hydrogens=strict_hydrogens reader=PDBReader(filename) - flags=0 if calpha_only: flags|=PDB.CALPHA_ONLY @@ -96,6 +117,8 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=False, flags|=PDB.NO_HETATMS if join_spread_atom_records: flags|=PDB.JOIN_SPREAD_ATOM_RECORDS + if dialect=='CHARMM': + flags|=PDB.CHARMM_FORMAT try: PDB.PushFlags(PDB.Flags() | flags) if load_multi: diff --git a/modules/io/pymod/export_pdb_io.cc b/modules/io/pymod/export_pdb_io.cc index 1aec7e6fe5415aea60d813955b924b271c3159db..1275ea7a62fdc15025c646046d468a80ac1f6dd6 100644 --- a/modules/io/pymod/export_pdb_io.cc +++ b/modules/io/pymod/export_pdb_io.cc @@ -54,6 +54,7 @@ void export_pdb_io() pdb_scope.attr("WRITE_MULTIPLE_MODELS")=PDB::WRITE_MULTIPLE_MODELS; pdb_scope.attr("JOIN_SPREAD_ATOM_RECORDS")=PDB::JOIN_SPREAD_ATOM_RECORDS; pdb_scope.attr("CALPHA_ONLY")=PDB::CALPHA_ONLY; + pdb_scope.attr("CHARMM_FORMAT")=PDB::CHARMM_FORMAT; } class_<PDBReader, boost::noncopyable>("PDBReader", init<String>()) diff --git a/modules/io/src/mol/pdb_io.cc b/modules/io/src/mol/pdb_io.cc index 30b6dc17eb3dc7012c11afd4b5098be28b9f67be..eba9c8877083eaea23bbb1ad81269269fe8d6d96 100644 --- a/modules/io/src/mol/pdb_io.cc +++ b/modules/io/src/mol/pdb_io.cc @@ -31,6 +31,7 @@ namespace ost { namespace io { const unsigned int PDB::PQR_FORMAT=0x8; const unsigned int PDB::JOIN_SPREAD_ATOM_RECORDS=0x10; const unsigned int PDB::CALPHA_ONLY=0x20; + const unsigned int PDB::CHARMM_FORMAT=0x40; void PDB::PushFlags(unsigned int flags) {PDB::Instance().fstack.push(flags);} unsigned int PDB::Flags() {return PDB::Instance().fstack.empty() ? 0 : PDB::Instance().fstack.top();} diff --git a/modules/io/src/mol/pdb_io.hh b/modules/io/src/mol/pdb_io.hh index b2d1e00bf32f866a7c917332fd08e4af751140dc..c4fc19e035f2b6bf02d135facf06091cb1c38250 100644 --- a/modules/io/src/mol/pdb_io.hh +++ b/modules/io/src/mol/pdb_io.hh @@ -42,28 +42,37 @@ struct DLLEXPORT_OST_IO PDB { /// \brief enable for PQR static const unsigned int PQR_FORMAT; - /// \brief Join atom records into one residue, even if the atom records - /// are not sequential. - /// - /// This is useful in the following case: - /// - /// \verbatim - /// ATOM 43 N AALA P 4 - /// ATOM 45 CA AALA P 4 - /// ATOM 47 C AALA P 4 - /// ATOM 48 O AALA P 4 - /// ATOM 49 N APRO P 5 - /// ATOM 50 CD APRO P 5 - /// ATOM 53 CA APRO P 5 - /// ATOM 55 CB APRO P 5 - /// ATOM 58 CG APRO P 5 - /// ATOM 61 C APRO P 5 - /// ATOM 62 O APRO P 5 - /// ATOM 550 CB AALA P 4 - /// \endverbatim - /// - /// By default, the atom 550 will start a new residue instead of being - /// joined with atoms 43-48 into one residue. + /// \brief enables parsing of charmm-style PDBs + /// + /// CHARMM files don't use the chain column to mark different chains, but + /// rather put the name in the last columns that is isually used for the atom + /// element an occupancy. Note that it is perfectly possible to parse CHARRM + /// PDB files without this flag, but the mileage may vary as some of the + /// elements are incorrectly assigned. + static const unsigned int CHARMM_FORMAT; + + /// \brief Join atom records into one residue, even if the atom records + /// are not sequential. + /// + /// This is useful in the following case: + /// + /// \verbatim + /// ATOM 43 N AALA P 4 + /// ATOM 45 CA AALA P 4 + /// ATOM 47 C AALA P 4 + /// ATOM 48 O AALA P 4 + /// ATOM 49 N APRO P 5 + /// ATOM 50 CD APRO P 5 + /// ATOM 53 CA APRO P 5 + /// ATOM 55 CB APRO P 5 + /// ATOM 58 CG APRO P 5 + /// ATOM 61 C APRO P 5 + /// ATOM 62 O APRO P 5 + /// ATOM 550 CB AALA P 4 + /// \endverbatim + /// + /// By default, the atom 550 will start a new residue instead of being + /// joined with atoms 43-48 into one residue. static const unsigned int JOIN_SPREAD_ATOM_RECORDS; /// \brief only import C-alpha atoms diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc index 8ed929e08630b3d24c94ced21f2aee5edffa814c..5a628c2ccfe769d5c6144281a03966cd6adc8adc 100644 --- a/modules/io/src/mol/pdb_reader.cc +++ b/modules/io/src/mol/pdb_reader.cc @@ -149,8 +149,10 @@ void PDBReader::Import(mol::EntityHandle& ent, this->ParseAndAddAtom(curr_line, line_num_, ent, StringRef("ATOM", 4)); } else if (IEquals(curr_line.substr(0, 6), StringRef("ANISOU", 6))) { - LOG_TRACE("processing ANISOU entry"); - this->ParseAnisou(curr_line, line_num_, ent); + if (!(PDB::Flags() & PDB::CHARMM_FORMAT)) { + LOG_TRACE("processing ANISOU entry"); + this->ParseAnisou(curr_line, line_num_, ent); + } } break; case 'E': @@ -183,7 +185,9 @@ void PDBReader::Import(mol::EntityHandle& ent, this->ParseAndAddAtom(curr_line, line_num_, ent, StringRef("HETATM", 6)); } else if (IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6))) { - this->ParseHelixEntry(curr_line); + if (!(PDB::Flags() & PDB::CHARMM_FORMAT)) { + this->ParseHelixEntry(curr_line); + } } break; case 'M': @@ -211,7 +215,9 @@ void PDBReader::Import(mol::EntityHandle& ent, continue; } if (IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6))) { - this->ParseStrandEntry(curr_line); + if (!(PDB::Flags() & PDB::CHARMM_FORMAT)) { + this->ParseStrandEntry(curr_line); + } } break; default: @@ -291,18 +297,26 @@ bool PDBReader::EnsureLineLength(const StringRef& line, size_t size) } bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, - char& chain_name, StringRef& res_name, + String& chain_name, StringRef& res_name, mol::ResNum& resnum, StringRef& atom_name, char& alt_loc, const StringRef& record_type) { if (!this->EnsureLineLength(line, 27)) { return false; } - chain_name=line[21]; - if (restrict_chains_.size()>0 && - restrict_chains_.find(chain_name)==String::npos) { - return false; + if (PDB::Flags() & PDB::CHARMM_FORMAT) { + if (line.size()>73) { + size_t width=std::min(line.size()-72, size_t(4)); + chain_name=line.substr(72, width).trim().str(); + } + } else { + chain_name=String(1, line[21]); + if (restrict_chains_.size()>0 && + restrict_chains_.find(chain_name)==String::npos) { + return false; + } } + std::pair<bool, int> a_num=line.substr(6, 5).ltrim().to_int(); if (!a_num.first) { if (PDB::Flags() & PDB::SKIP_FAULTY_RECORDS) { @@ -312,7 +326,7 @@ bool PDBReader::ParseAtomIdent(const StringRef& line, int line_num, } atom_name=line.substr(12, 4).trim(); alt_loc=line[16]; - res_name=line.substr(17, 3).trim(); + res_name=line.substr(17, (PDB::Flags() & PDB::CHARMM_FORMAT) ? 4 : 3).trim(); std::pair<bool, int> res_num=line.substr(22, 4).ltrim().to_int();; if (!res_num.first) { if (PDB::Flags() & PDB::SKIP_FAULTY_RECORDS) { @@ -341,7 +355,7 @@ void PDBReader::ParseAnisou(const StringRef& line, int line_num, if (!this->EnsureLineLength(line, 77)) { return; } - char chain_name=0; + String chain_name; char alt_loc=0; StringRef res_name, atom_name; mol::ResNum res_num(0); @@ -399,7 +413,7 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, } mol::XCSEditor editor=ent.RequestXCSEditor(mol::BUFFERED_EDIT); char alt_loc=0; - char chain_name=0; + String chain_name; StringRef res_name, atom_name; mol::ResNum res_num(0); if (!this->ParseAtomIdent(line, line_num, chain_name, res_name, res_num, @@ -440,31 +454,28 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, } LOG_TRACE( "line: [" << line << "]" ); String s_ele; - String s_chain(1, chain_name); - String aname(atom_name.str()); - // determine element from element column (77-78, right justified) if present - // otherwise set to empty String. It is up to the builder to determine the - // correct element in that case. - if (line.length()>=78) { // element column present - if(line[76]==' ' && line[77]==' ') { // both characters are empty - s_ele=""; - } else if(line[76]!=' ' || line[77]!=' ') { // at least one character not - // empty - if(line[76]==' ' && line[77]!=' ') { // single character element, - // right justified - s_ele=line.substr(77,1).str(); - } else if(line[76]!=' ' && line[77]==' ') { // single character element, - // left justified - s_ele=line.substr(76,1).str(); - } else { // Real character element - s_ele=line.substr(76,2).str(); + if (!(PDB::Flags() & PDB::CHARMM_FORMAT)) { + // determine element from element column (77-78, right justified) if + // present otherwise set to empty String. It is up to the builder to + // determine the correct element in that case. + if (line.length()>=78) { // element column present + if(line[76]==' ' && line[77]==' ') { // both characters are empty + s_ele=""; + } else if(line[76]!=' ' || line[77]!=' ') { // at least one character not + // empty + if(line[76]==' ' && line[77]!=' ') { // single character element, + // right justified + s_ele=line.substr(77,1).str(); + } else if(line[76]!=' ' && line[77]==' ') {// single character element, + // left justified + s_ele=line.substr(76,1).str(); + } else { // Real character element + s_ele=line.substr(76,2).str(); + } } - } - } - else { - s_ele=""; // no element column present + } } - + String aname(atom_name.str()); // some postprocessing LOG_TRACE( "s_chain: [" << chain_name << "]" ); @@ -474,7 +485,7 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, if(!curr_chain_) { update_chain=true; update_residue=true; - } else if(curr_chain_.GetName()!=s_chain) { + } else if(curr_chain_.GetName()!=chain_name) { update_chain=true; update_residue=true; } @@ -493,11 +504,11 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, curr_chain_=ent.FindChain(s_chain); } #else - curr_chain_=ent.FindChain(s_chain); + curr_chain_=ent.FindChain(chain_name); #endif if(!curr_chain_.IsValid()) { - LOG_DEBUG("new chain " << s_chain); - curr_chain_=editor.InsertChain(s_chain); + LOG_DEBUG("new chain " << chain_name); + curr_chain_=editor.InsertChain(chain_name); ++chain_count_; } assert(curr_chain_.IsValid()); @@ -554,22 +565,10 @@ void PDBReader::ParseAndAddAtom(const StringRef& line, int line_num, } else { mol::AtomHandle ah=editor.InsertAltAtom(curr_residue_, aname, String(1, alt_loc), apos, aprop); - /* - for now, this is not needed - the cg will use an global atom id sorted - list insteadx - if (PDB::Flags() & PDB::SEQUENTIAL_ATOM_IMPORT) { - sequential_atom_list_.push_back(ah); - } - */ ++atom_count_; } } else { mol::AtomHandle ah = editor.InsertAtom(curr_residue_, aname, apos, aprop); - /* - if (PDB::Flags() & PDB::SEQUENTIAL_ATOM_IMPORT) { - sequential_atom_list_.push_back(ah); - } - */ ++atom_count_; } } diff --git a/modules/io/src/mol/pdb_reader.hh b/modules/io/src/mol/pdb_reader.hh index c2a624105c31fa2921cbcf2f0763a5cb6ed0779a..2c2ca2c1a16eadd110ad79aca78dcf31e8afc06b 100644 --- a/modules/io/src/mol/pdb_reader.hh +++ b/modules/io/src/mol/pdb_reader.hh @@ -57,7 +57,7 @@ private: /// \brief parses the common part of ATOM, HETATM and ANISOU records bool ParseAtomIdent(const StringRef& line, int line_num, - char& chain_name, StringRef& res, + String& chain_name, StringRef& res, mol::ResNum& resnum, StringRef& atom_name, char& alt_loc, const StringRef& record_type); void ParseAnisou(const StringRef& line, int line_num,