Skip to content
Snippets Groups Projects
Commit 78606b84 authored by marco's avatar marco
Browse files

improved behaviour of rule-based builder

as soon as an atom is unknown, we switch back to a distance-based
connect mechanism for the full residue. This avoids problems with
residues coming from CHARMM/GROMACS and other force fields that do
not follow the naming convention of the PDB. To really become useful,
some additional changes to the compound library are required that
will be implemented in a next commit

git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@2714 5a81b35b-ba03-0410-adc8-b2c5c5119f08
parent 9f0fc39f
No related branches found
No related tags found
No related merge requests found
#include <ost/conop/chemdict_parser.hh>
namespace ost { namespace conop {
bool ChemdictParser::OnBeginData(const StringRef& data_name)
{
compound_.reset(new Compound(data_name.str()));
if (last_!=data_name[0]) {
last_=data_name[0];
std::cout << last_ << std::flush;
}
atom_map_.clear();
insert_=true;
return true;
}
bool ChemdictParser::OnBeginLoop(const io::StarLoopDesc& header)
{
if (header.GetCategory()=="chem_comp_atom") {
loop_type_=ATOM_SPEC;
indices_[ATOM_NAME]=header.GetIndex("atom_id");
indices_[ALT_ATOM_NAME]=header.GetIndex("alt_atom_id");
indices_[ELE]=header.GetIndex("type_symbol");
indices_[IS_LEAVING]=header.GetIndex("pdbx_leaving_atom_flag");
indices_[IS_AROMATIC]=header.GetIndex("pdbx_aromatic_flag");
indices_[ORDINAL]=header.GetIndex("pdbx_ordinal");
return true;
} else if (header.GetCategory()=="chem_comp_bond") {
loop_type_=BOND_SPEC;
indices_[ATOM_ID1]=header.GetIndex("atom_id_1");
indices_[ATOM_ID2]=header.GetIndex("atom_id_2");
indices_[BOND_ORDER]=header.GetIndex("value_order");
return true;
}
loop_type_=DONT_KNOW;
return false;
}
void ChemdictParser::OnDataRow(const io::StarLoopDesc& header,
const std::vector<StringRef>& columns)
{
if (loop_type_==ATOM_SPEC) {
AtomSpec atom;
atom.is_leaving=columns[indices_[IS_LEAVING]][0]=='Y';
atom.name=columns[indices_[ATOM_NAME]].str();
atom.alt_name=columns[indices_[ALT_ATOM_NAME]].str();
atom.ordinal=columns[indices_[ORDINAL]].to_int().second-1;
atom.element=columns[indices_[ELE]].str();
atom.is_aromatic=columns[indices_[IS_AROMATIC]][0]=='Y';
compound_->AddAtom(atom);
atom_map_[atom.name]=atom.ordinal;
} else if (loop_type_==BOND_SPEC) {
BondSpec bond;
String atom_one=columns[indices_[ATOM_ID1]].str();
String atom_two=columns[indices_[ATOM_ID2]].str();
bond.atom_one=atom_map_[atom_one];
bond.atom_two=atom_map_[atom_two];
char b=columns[indices_[BOND_ORDER]][0];
bond.order=b=='D' ? 2 : (b=='S' ? 1 : 3);
if (bond.atom_one>bond.atom_two) {
std::swap(bond.atom_one, bond.atom_two);
}
compound_->AddBond(bond);
}
}
void ChemdictParser::OnDataItem(const io::StarDataItem& item)
{
if (item.GetCategory()==StringRef("chem_comp", 9)) {
if (item.GetName()==StringRef("type", 4)) {
// convert type to uppercase
String type=item.GetValue().str();
for (String::iterator i=type.begin(), e=type.end(); i!=e; ++i) {
*i=toupper(*i);
}
// The type of water is set to "?". let's change it to water...
if (compound_->GetID()=="HOH") {
compound_->SetChemClass(mol::ChemClass(mol::ChemClass::Water));
} else {
std::map<String, mol::ChemClass>::iterator i=tm_.find(type);
if (i!=tm_.end()) {
compound_->SetChemClass(i->second);
} else {
std::cout << "unknown type '" << type << "' for compound "
<< compound_->GetID() << std::endl;
}
}
} else if (item.GetName()==StringRef("formula", 7)) {
compound_->SetFormula(item.GetValue().str());
} else if (item.GetName()==StringRef("one_letter_code", 15)) {
if (item.GetValue().length()==1) {
compound_->SetOneLetterCode(item.GetValue()[0]);
}
} else if (item.GetName()==StringRef("pdbx_initial_date", 17)) {
compound_->SetCreationDate(Date::FromString(item.GetValue()));
} else if (item.GetName()==StringRef("pdbx_modified_date", 18)) {
compound_->SetModificationDate(Date::FromString(item.GetValue()));
}
} else if (item.GetName()==StringRef("atom_id", 7)) {
atom_.name=item.GetValue().str();
} else if (item.GetName()==StringRef("alt_atom_id", 11)) {
atom_.alt_name=item.GetValue().str();
} else if (item.GetName()==StringRef("type_symbol", 11)) {
atom_.element=item.GetValue().str();
} else if (item.GetName()==StringRef("pdbx_ordinal", 12)) {
atom_.ordinal=item.GetValue().to_int().second-1;
}
}
void ChemdictParser::OnEndData()
{
if (insert_) {
if (compound_->GetAtomSpecs().empty()) {
compound_->AddAtom(atom_);
}
lib_->AddCompound(compound_);
}
}
std::map<String, mol::ChemClass> ChemdictParser::tm_=std::map<String, mol::ChemClass>();
void ChemdictParser::InitTypeMap()
{
if (!tm_.empty())
return;
tm_["L-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking);
tm_["L-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking);
tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::DPeptideLinking);
tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::LSaccharide);
tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::DSaccharide);
tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::LSaccharide);
tm_["D-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::DSaccharide);
tm_["SACCHARIDE"]=mol::ChemClass(mol::ChemClass::Saccharide);
tm_["D-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::DPeptideLinking);
tm_["L-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking);
tm_["L-PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking);
tm_["DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking);
tm_["RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking);
tm_["L-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking);
tm_["L-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking);
tm_["R-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking);
tm_["R-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking);
tm_["DNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::DNALinking);
tm_["PEPTIDE-LIKE"]=mol::ChemClass(mol::ChemClass::PeptideLinking);
tm_["PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking);
tm_["PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking);
tm_["NON-POLYMER"]=mol::ChemClass(mol::ChemClass::NonPolymer);
tm_["RNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNALinking);
tm_["?"]=mol::ChemClass(mol::ChemClass::Unknown);
}
}}
\ No newline at end of file
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_CONOP_CHEMDICT_PARSER_HH
#define OST_CONOP_CHEMDICT_PARSER_HH
/*
Author: Marco Biasini
*/
#include <ost/mol/mol.hh>
#include <ost/io/mol/star_parser.hh>
#include <ost/conop/compound_lib.hh>
namespace ost { namespace conop {
typedef enum {
ATOM_SPEC,
BOND_SPEC,
DONT_KNOW
} LoopType;
class DLLEXPORT_OST_CONOP ChemdictParser : public io::StarParser {
public:
ChemdictParser(std::istream& stream):
io::StarParser(stream),
compound_(new Compound("UNK")), last_(0), loop_type_(DONT_KNOW)
{
this->InitTypeMap();
}
virtual bool OnBeginData(const StringRef& data_name);
virtual bool OnBeginLoop(const io::StarLoopDesc& header);
virtual void OnDataRow(const io::StarLoopDesc& header,
const std::vector<StringRef>& columns);
virtual void OnDataItem(const io::StarDataItem& item);
virtual void OnEndData();
void SetCompoundLib(const CompoundLibPtr& lib)
{
lib_=lib;
}
private:
void InitTypeMap();
CompoundLibPtr lib_;
CompoundPtr compound_;
typedef enum {
ATOM_NAME=0,
ALT_ATOM_NAME=1,
IS_AROMATIC=2,
ORDINAL=3,
IS_LEAVING=4,
ELE=5,
STEREO_CONF=6,
ATOM_ID1=0,
ATOM_ID2=1,
BOND_ORDER=2
} PropIndex;
char last_;
int indices_[10];
bool insert_;
static std::map<String, mol::ChemClass> tm_;
std::map<String, int> atom_map_;
LoopType loop_type_;
AtomSpec atom_;
};
}}
......@@ -214,10 +214,13 @@ namespace {
class PropAssigner: public mol::EntityVisitor {
public:
PropAssigner(const BuilderP& builder): builder_(builder) {}
virtual bool VisitResidue(const mol::ResidueHandle& res)
{
String key=builder_->IdentifyResidue(res);
builder_->CheckResidueCompleteness(res);
if (key=="UNK") {
unk_res_[res.GetKey()]+=1;
}
builder_->FillResidueProps(res);
return true;
}
......@@ -227,8 +230,23 @@ public:
builder_->FillAtomProps(atom);
return false;
}
virtual void OnExit()
{
for (std::map<String, int>::iterator i=unk_res_.begin(),
e=unk_res_.end(); i!=e; ++i) {
if (i->second>1) {
LOG_WARNING("structure contains unknown residues with name " << i->first
<< " ("<< i->second << "x)");
} else {
LOG_WARNING("structure contains unknown residue with name " << i->first);
}
}
}
private:
BuilderP builder_;
std::map<String, int> unk_res_;
};
......@@ -247,8 +265,8 @@ public:
virtual bool VisitResidue(const mol::ResidueHandle& res) {
builder_->ConnectAtomsOfResidue(res);
if(prev_) {
builder_->ConnectResidueToPrev(res,prev_);
if (prev_) {
builder_->ConnectResidueToPrev(res,prev_);
}
prev_=res;
return false;
......
......@@ -33,8 +33,9 @@ namespace ost { namespace conop {
void RuleBasedBuilder::CompleteAtoms(mol::ResidueHandle rh) {}
void RuleBasedBuilder::CheckResidueCompleteness(const mol::ResidueHandle& rh) {
LookupCompound(rh);
void RuleBasedBuilder::CheckResidueCompleteness(const mol::ResidueHandle& rh)
{
this->LookupCompound(rh);
if (!last_compound_) {
mol::AtomHandleList atoms=rh.GetAtomList();
for (mol::AtomHandleList::const_iterator i=atoms.begin(),
......@@ -43,7 +44,7 @@ void RuleBasedBuilder::CheckResidueCompleteness(const mol::ResidueHandle& rh) {
}
return;
}
ReorderAtoms(rh , last_compound_);
this->ReorderAtoms(rh , last_compound_);
AtomSpecList::const_iterator j=last_compound_->GetAtomSpecs().begin();
mol::AtomHandleList atoms=rh.GetAtomList();
mol::AtomHandleList::iterator i=atoms.begin();
......@@ -53,12 +54,30 @@ void RuleBasedBuilder::CheckResidueCompleteness(const mol::ResidueHandle& rh) {
if ((*j).ordinal!=static_cast<int>((*i).Impl()->GetState())) {
this->OnMissingAtom(rh, (*j).name);
} else {
this->FillAtomProps(*i, *j);
++i;
}
}
}
bool RuleBasedBuilder::HasUnknownAtoms(mol::ResidueHandle res)
{
this->LookupCompound(res);
if (!last_compound_) {
return true;
}
this->ReorderAtoms(res, last_compound_);
AtomSpecList::const_iterator j=last_compound_->GetAtomSpecs().begin();
mol::AtomHandleList atoms=res.GetAtomList();
mol::AtomHandleList::iterator i=atoms.begin();
for (mol::AtomHandleList::iterator
i=atoms.begin(), e=atoms.end(); i!=e; ++i) {
if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max()) {
return true;
}
}
return false;
}
void RuleBasedBuilder::FillAtomProps(mol::AtomHandle atom, const AtomSpec& spec)
{
mol::AtomProp props=atom.GetAtomProps();
......@@ -75,8 +94,9 @@ void RuleBasedBuilder::FillAtomProps(mol::AtomHandle atom, const AtomSpec& spec)
atom.SetAtomProps(props);
}
void RuleBasedBuilder::FillResidueProps(mol::ResidueHandle residue) {
LookupCompound(residue);
void RuleBasedBuilder::FillResidueProps(mol::ResidueHandle residue)
{
this->LookupCompound(residue);
if (!last_compound_)
return;
residue.SetChemClass(last_compound_->GetChemClass());
......@@ -103,6 +123,9 @@ void RuleBasedBuilder::LookupCompound(const mol::ResidueHandle& rh)
void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue,
CompoundPtr compound)
{
if (last_residue_==residue) {
return;
}
mol::impl::ResidueImplPtr impl=residue.Impl();
mol::impl::AtomImplList::iterator i=impl->GetAtomList().begin();
for (; i!=impl->GetAtomList().end(); ++i) {
......@@ -110,7 +133,7 @@ void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue,
int index=compound->GetAtomSpecIndex(atom->GetName());
if (index==-1) {
if (!this->OnUnknownAtom(mol::AtomHandle(atom))) {
atom->SetState(std::numeric_limits<int>::max());
atom->SetState(std::numeric_limits<unsigned int>::max());
}
continue;
}
......@@ -118,6 +141,14 @@ void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue,
}
std::sort(impl->GetAtomList().begin(), impl->GetAtomList().end(),
OrdinalComp());
last_residue_=residue;
unknown_atoms_=this->HasUnknownAtoms(residue);
if (unknown_atoms_) {
LOG_WARNING("residue " << residue << " doesn't look like a standard "
<< residue.GetKey());
residue.SetChemClass(mol::ChemClass(mol::ChemClass::Unknown));
residue.SetOneLetterCode('?');
}
}
......@@ -132,7 +163,9 @@ mol::ResidueKey RuleBasedBuilder::IdentifyResidue(const mol::ResidueHandle& rh)
}
}
mol::AtomHandle RuleBasedBuilder::LocateAtom(const mol::AtomHandleList& ahl, int ordinal) {
mol::AtomHandle RuleBasedBuilder::LocateAtom(const mol::AtomHandleList& ahl,
int ordinal)
{
if (ahl.empty())
return mol::AtomHandle();
const mol::AtomHandle* r_it=&ahl.back();
......@@ -148,14 +181,26 @@ mol::AtomHandle RuleBasedBuilder::LocateAtom(const mol::AtomHandleList& ahl, int
return not_found ? mol::AtomHandle() : *r_it;
}
void RuleBasedBuilder::ConnectAtomsOfResidue(mol::ResidueHandle rh) {
inline void dist_connect(RuleBasedBuilder* b, mol::AtomHandleList atoms)
{
for (mol::AtomHandleList::const_iterator i=atoms.begin(),
e=atoms.end(); i!=e; ++i) {
b->DistanceBasedConnect(*i);
}
}
void RuleBasedBuilder::ConnectAtomsOfResidue(mol::ResidueHandle rh)
{
LookupCompound(rh);
if (!last_compound_) {
mol::AtomHandleList atoms=rh.GetAtomList();
for (mol::AtomHandleList::const_iterator i=atoms.begin(),
e=atoms.end(); i!=e; ++i) {
this->DistanceBasedConnect(*i);
}
dist_connect(this, rh.GetAtomList());
return;
}
this->ReorderAtoms(rh, last_compound_);
if (unknown_atoms_) {
dist_connect(this, rh.GetAtomList());
return;
}
mol::XCSEditor e=rh.GetEntity().RequestXCSEditor(mol::BUFFERED_EDIT);
......@@ -166,19 +211,14 @@ void RuleBasedBuilder::ConnectAtomsOfResidue(mol::ResidueHandle rh) {
mol::AtomHandle a1=this->LocateAtom(atoms, bond.atom_one);
mol::AtomHandle a2=this->LocateAtom(atoms, bond.atom_two);
if (a1.IsValid() && a2.IsValid() && this->IsBondFeasible(a1, a2)) {
e.Connect(a1, a2, bond.order);
e.Connect(a1, a2, bond.order);
}
}
for (mol::AtomHandleList::const_iterator i=atoms.begin(),
e=atoms.end(); i!=e; ++i) {
if (int((*i).Impl()->GetState())==std::numeric_limits<int>::max()) {
this->DistanceBasedConnect(*i);
}
}
}
void RuleBasedBuilder::ConnectResidueToNext(mol::ResidueHandle rh,
mol::ResidueHandle next) {
mol::ResidueHandle next)
{
if (!next.IsValid()) {
return;
}
......@@ -208,7 +248,8 @@ void RuleBasedBuilder::ConnectResidueToNext(mol::ResidueHandle rh,
}
void RuleBasedBuilder::AssignTorsions(mol::ChainHandle chain) {
void RuleBasedBuilder::AssignTorsions(mol::ChainHandle chain)
{
if (chain.GetResidueCount()==0)
return;
std::vector<mol::ResidueHandle> rlist = chain.GetResidueList();
......@@ -219,7 +260,8 @@ void RuleBasedBuilder::AssignTorsions(mol::ChainHandle chain) {
}
}
void RuleBasedBuilder::AssignTorsionsToResidue(mol::ResidueHandle residue) {
void RuleBasedBuilder::AssignTorsionsToResidue(mol::ResidueHandle residue)
{
/// The only components having named torsions are the standard set of amino
/// acids, plus some of compounds derived from them such as selenium
/// methionine. Things are simplified a lot by only storing the torsions
......
......@@ -108,14 +108,16 @@ public:
/// \brief Set residue properties such as chemical class
virtual void FillResidueProps(mol::ResidueHandle residue);
/// \brief whether the residue has unknown atoms
bool HasUnknownAtoms(mol::ResidueHandle res);
/// \brief Check whether the residue has all required atoms. This does not
/// include hydrogens and leaving atoms such as the terminal OXT.
virtual bool IsResidueComplete(const mol::ResidueHandle& residue);
private:
CompoundLibPtr compound_lib_;
CompoundPtr last_compound_;
mol::ResidueHandle last_residue_;
bool unknown_atoms_;
void LookupCompound(const mol::ResidueHandle& rh);
/// Change internal order of atoms in residue to the order given by compound
void ReorderAtoms(mol::ResidueHandle residue, CompoundPtr compound);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment