diff --git a/modules/conop/pymod/export_heuristic.cc b/modules/conop/pymod/export_heuristic.cc index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..11efd96d82517aaf4f94744614fac49a82383a74 100644 --- a/modules/conop/pymod/export_heuristic.cc +++ b/modules/conop/pymod/export_heuristic.cc @@ -0,0 +1,34 @@ + +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <boost/python.hpp> +using namespace boost::python; +#include <ost/conop/heuristic.hh> + +using namespace ost::conop; + +void export_heuristic() { + + class_<HeuristicProcessor, HeuristicProcessorPtr, + boost::noncopyable, bases<Processor> >("HeuristicProcessor", + init<>()) + ; +} + diff --git a/modules/conop/pymod/export_processor.cc b/modules/conop/pymod/export_processor.cc index b051c78ab5e8b7861b7ebcee0259b73de04c88e2..c6f1cf2a236ce0a49bfb1df5e9f3873de04d607e 100644 --- a/modules/conop/pymod/export_processor.cc +++ b/modules/conop/pymod/export_processor.cc @@ -57,16 +57,10 @@ void export_processor() { .add_property("check_bond_feasibility", &Processor::GetCheckBondFeasibility, &Processor::SetCheckBondFeasibility) - .add_property("strict_hydrogens", &Processor::GetStrictHydrogens, - &Processor::SetStrictHydrogens) .add_property("connect", &Processor::GetConnect, &Processor::SetConnect) .add_property("assign_torsions", &Processor::GetAssignTorsions, &Processor::SetAssignTorsions) - .add_property("unk_res_treatment", &Processor::GetUnkResidueTreatment, - &Processor::SetUnkResidueTreatment) - .add_property("unk_atom_treatment", &Processor::GetUnkAtomTreatment, - &Processor::SetUnkAtomTreatment) .def("Process", &Processor::Process, (arg("ent"), arg("log_diags")=true)) ; diff --git a/modules/conop/pymod/export_rule_based.cc b/modules/conop/pymod/export_rule_based.cc index ab3aefa415aa9323f2da3bcef6dd21d9acb8bf7a..5331dd917a80e82d16847e3b34457aab5ec81c95 100644 --- a/modules/conop/pymod/export_rule_based.cc +++ b/modules/conop/pymod/export_rule_based.cc @@ -30,6 +30,12 @@ void export_rule_based() { init<CompoundLibPtr>()) .add_property("fix_element", &RuleBasedProcessor::GetFixElement, &RuleBasedProcessor::SetFixElement) + .add_property("unk_res_treatment", &RuleBasedProcessor::GetUnkResidueTreatment, + &RuleBasedProcessor::SetUnkResidueTreatment) + .add_property("unk_atom_treatment", &RuleBasedProcessor::GetUnkAtomTreatment, + &RuleBasedProcessor::SetUnkAtomTreatment) + .add_property("strict_hydrogens", &RuleBasedProcessor::GetStrictHydrogens, + &RuleBasedProcessor::SetStrictHydrogens) ; } diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index b76739548e84353e4c8db1c03d38b9714cd77108..119c86c325f3ce6fc48cf88298838f441cb04a36 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -28,13 +28,16 @@ void export_AminoAcids(); void export_NonStandard(); void export_processor(); void export_rule_based(); +void export_heuristic(); void export_diag(); + BOOST_PYTHON_MODULE(_ost_conop) { // export_Builder(); export_Conop(); export_processor(); export_rule_based(); + export_heuristic(); export_Compound(); export_RingFinder(); export_AminoAcids(); diff --git a/modules/conop/src/heuristic.cc b/modules/conop/src/heuristic.cc index 12942520823772bb88c723cf3fb15314c474e678..335f14f123348f36bdf740ede1ab3cbd6987cc4b 100644 --- a/modules/conop/src/heuristic.cc +++ b/modules/conop/src/heuristic.cc @@ -26,11 +26,66 @@ #include <ost/mol/impl/atom_impl.hh> #include <ost/mol/residue_handle.hh> #include "heuristic.hh" +#include "conop.hh" namespace ost { namespace conop { +void HeuristicProcessor::ProcessUnkResidue(DiagnosticsPtr diags, + mol::ResidueHandle res) const { + res.SetChemClass(GuessChemClass(res)); + mol::AtomHandleList atoms = res.GetAtomList(); + for (mol::AtomHandleList::iterator + i = atoms.begin(), e = atoms.end(); i != e; ++i) { + mol::AtomHandle a = *i; + if (!Conopology::Instance().IsValidElement(a.GetElement())) + a.SetElement(GuessAtomElement(a.GetName(), a.IsHetAtom())); + } + if (!this->GetConnect()) { return; } + for (mol::AtomHandleList::iterator + i = atoms.begin(), e = atoms.end(); i != e; ++i) { + this->DistanceBasedConnect(*i); + } +} + void HeuristicProcessor::DoProcess(DiagnosticsPtr diags, - mol::EntityHandle ent) const { + mol::EntityHandle ent) const +{ + Profile prof("HeuristicProcessor::Process"); + mol::ChainHandleList chains=ent.GetChainList(); + for (mol::ChainHandleList::iterator + i = chains.begin(), e = chains.end(); i!= e; ++i) { + mol::ChainHandle& chain = *i; + mol::ResidueHandleList residues = chain.GetResidueList(); + mol::ResidueHandle prev; + for (mol::ResidueHandleList::iterator + j = residues.begin(), e2 = residues.end(); j != e2; ++j) { + mol::ResidueHandle residue = *j; + CompoundPtr compound = lib_.FindCompound(residue.GetName(), Compound::PDB); + if (!compound) { + // process unknown residue... + this->ProcessUnkResidue(diags, residue); + continue; + } + this->ReorderAtoms(residue, compound, true); + this->FillResidueProps(residue, compound); + if (this->GetConnect()) { + this->ConnectAtomsOfResidue(residue, compound, false); + this->ConnectResidues(prev, residue); + } + prev = residue; + if (!this->GetConnect()) { continue; } + mol::AtomHandleList atoms = residue.GetAtomList(); + for (mol::AtomHandleList::iterator + i = atoms.begin(), e = atoms.end(); i != e; ++i) { + if (i->GetBondCount() == 0) + this->DistanceBasedConnect(*i); + } + } + if (residues.empty() || !this->GetAssignTorsions()) { + continue; + } + AssignBackboneTorsions(residues); + } } }} diff --git a/modules/conop/src/heuristic.hh b/modules/conop/src/heuristic.hh index 07f2d6e96a8cad443576cf697e2e7a9e8e9d78e5..568b05d0df535479c45f3ead5df2b958a8c767ba 100644 --- a/modules/conop/src/heuristic.hh +++ b/modules/conop/src/heuristic.hh @@ -20,7 +20,7 @@ #define OST_CONOP_HEURISTIC_HH #include <ost/mol/entity_handle.hh> -#include "compound_lib.hh" +#include "minimal_compound_lib.hh" #include "diag.hh" #include "processor.hh" @@ -39,9 +39,11 @@ public: return ProcessorPtr(new HeuristicProcessor(*this)); } protected: + void ProcessUnkResidue(DiagnosticsPtr diags, mol::ResidueHandle res) const; virtual void DoProcess(DiagnosticsPtr diags, mol::EntityHandle ent) const; private: + MinimalCompoundLib lib_; }; diff --git a/modules/conop/src/processor.cc b/modules/conop/src/processor.cc index 57dbddc419dbbc0c7dc5f454912bbe3409ea6a31..d9fbf418692ebcd7bfc69bd894f6d878de2faf7a 100644 --- a/modules/conop/src/processor.cc +++ b/modules/conop/src/processor.cc @@ -20,10 +20,19 @@ #include <ost/mol/xcs_editor.hh> #include <ost/mol/bond_handle.hh> #include <ost/mol/torsion_handle.hh> +#include <ost/mol/impl/residue_impl.hh> +#include <ost/mol/impl/atom_impl.hh> +#include <ost/mol/residue_handle.hh> #include "processor.hh" namespace ost { namespace conop { +struct OrdinalAtomComp { + bool operator()(const mol::impl::AtomImplPtr& a, + const mol::impl::AtomImplPtr& b) const { + return a->GetState()<b->GetState(); + } +}; DiagnosticsPtr Processor::Process(mol::EntityHandle ent, bool log_diags) const { @@ -105,69 +114,93 @@ void AssignBackboneTorsions(mol::ResidueHandle prev, } } } - -void Processor::ProcessUnkResidue(DiagnosticsPtr diags, - mol::ResidueHandle res) const +mol::ChemClass GuessChemClass(mol::ResidueHandle res) { - // Don't do anything if treatment is set to SILENT - if (this->GetUnkResidueTreatment() == CONOP_SILENT) { - return; - } - switch (this->GetUnkResidueTreatment()) { - case CONOP_WARN: - diags->AddDiag(DIAG_UNK_RESIDUE, "unknown residue %0") - .AddResidue(res); - break; - case CONOP_FATAL: - // FIXME: Implement a ConopError based on Diag... - break; - case CONOP_REMOVE_RESIDUE: - case CONOP_REMOVE: - diags->AddDiag(DIAG_UNK_RESIDUE, "removed unknown residue %0") - .AddResidue(res); - res.GetEntity().EditXCS().DeleteResidue(res); - return; - default: - assert(0 && "unhandled switch"); + mol::AtomHandle ca=res.FindAtom("CA"); + if (!ca.IsValid() || ca.GetElement()!="C") return mol::ChemClass(); + mol::AtomHandle n=res.FindAtom("N"); + if (!n.IsValid() || n.GetElement()!="N") return mol::ChemClass(); + mol::AtomHandle c=res.FindAtom("C"); + if (!c.IsValid() || c.GetElement()!="C") return mol::ChemClass(); + mol::AtomHandle o=res.FindAtom("O"); + if (!o.IsValid() || o.GetElement()!="O") return mol::ChemClass(); + if (IsBondFeasible(n, ca) && IsBondFeasible(ca, c) && + IsBondFeasible(c, o)) { + return mol::ChemClass(mol::ChemClass::PEPTIDE_LINKING); } + return mol::ChemClass(); } - -void Processor::ProcessUnkAtoms(DiagnosticsPtr diags, - mol::AtomHandleList unks) const +String GuessAtomElement(const String& aname, bool hetatm) { - // Don't do anything if treatment is set to SILENT - if (this->GetUnkAtomTreatment() == CONOP_SILENT) { - return; + static String l1[] = { + "H","C","N","O","P","S","K" + }; + static int l1c=7; + static String l2[] = { + "NA","MG","AL","SI","CL", + "CA","CR","MN","FE","CO","NI","CU","ZN","AS","SE","BR","MO" + }; + static int l2c=17; + static String l3[] = { + "B","F","V" + }; + static int l3c=3; + static String l4[] = { + "HE","LI","BE","AR","SC","TI","GA","GE","KR" + }; + static int l4c=9; + + String ele=aname.substr(0,2); + // hydrogen hack + if(ele[0]=='H') { + return "H"; } - - assert(!unks.empty() && "empty unk list"); - mol::XCSEditor edi = unks.front().GetEntity().EditXCS(); - for (mol::AtomHandleList::iterator - i = unks.begin(), e = unks.end(); i != e; ++i) { - switch (this->GetUnkAtomTreatment()) { - case CONOP_REMOVE_ATOM: - case CONOP_REMOVE: - edi.DeleteAtom(*i); - diags->AddDiag(DIAG_UNK_ATOM, "removed unknown atom %0 from residue %1") - .AddString(i->GetName()).AddResidue(i->GetResidue()); - break; - case CONOP_WARN: - diags->AddDiag(DIAG_UNK_ATOM, "residue %0 contains unknown atom %1") - .AddResidue(i->GetResidue()).AddString(i->GetName()); - break; - case CONOP_FATAL: - // FIXME: Implement a ConopError based on Diag... - break; - case CONOP_REMOVE_RESIDUE: - diags->AddDiag(DIAG_UNK_ATOM, "removed residue %0 with unknown atom %1") - .AddString(i->GetResidue().GetQualifiedName()) - .AddString(i->GetName()); - edi.DeleteResidue(i->GetResidue()); - return; - default: - assert(0 && "unhandled switch"); + if (hetatm==false) { + if (ele=="CA" || ele=="CB") { + return "C"; } } + + // two characters + if(aname.size()==2) { + for(int i=0;i<l2c;i++) { + if(ele==l2[i]) return ele; + } + // check second character for match + for(int i=0;i<l1c;i++) { + if(ele[1]==l1[i][0]) { + return l1[i]; + } + } + // still no match, repeat with less likely tables + for(int i=0;i<l4c;i++) { + if(ele==l4[i]) return ele; + } + // check second character for match + for(int i=0;i<l3c;i++) { + if(ele[1]==l3[i][0]) { + return l3[i]; + } + } + // check second character for match + for(int i=0;i<l1c;i++) { + if(ele[0]==l1[i][0]) { + return l1[i]; + } + } + } else { + for(int i=0;i<l1c;i++) { + if(ele==l1[i]) return ele; + } + for(int i=0;i<l3c;i++) { + if(ele==l3[i]) return ele; + } + } + size_t i=0; + while (i<aname.size() && isdigit(aname[i])) { + ++i; + } + return i<aname.size() ? String(1, aname[i]) : ""; } bool IsBondFeasible(const mol::AtomHandle& atom_a, @@ -190,4 +223,202 @@ bool IsBondFeasible(const mol::AtomHandle& atom_a, return (len<=upper_bound && len>=lower_bound); } +void Processor::ConnectResidues(mol::ResidueHandle rh, + mol::ResidueHandle next) const +{ + if (!next.IsValid() || !rh.IsValid()) { + return; + } + + mol::XCSEditor e=rh.GetEntity().EditXCS(mol::BUFFERED_EDIT); + + // check if both of the residues are able to form a peptide bond. + if (rh.IsPeptideLinking() && next.IsPeptideLinking()) { + // If we have an OXT then there is no peptide bond connecting the two + // residues. + if (rh.FindAtom("OXT")) + return; + mol::AtomHandle c=rh.FindAtom("C"); + mol::AtomHandle n=next.FindAtom("N"); + // Give subclasses a chance to give us their opinions on the feasibility of + // the peptide bond. + if (c.IsValid() && n.IsValid() && IsBondFeasible(c, n)) { + e.Connect(c, n, 1); + rh.SetIsProtein(true); + next.SetIsProtein(true); + } + } else if (rh.IsNucleotideLinking() && next.IsNucleotideLinking()) { + mol::AtomHandle c=rh.FindAtom("O3'"); + mol::AtomHandle n=next.FindAtom("P"); + if (c.IsValid() && n.IsValid() && IsBondFeasible(c, n)) { + e.Connect(c, n, 1); + } + } +} + + +mol::AtomHandle Processor::LocateAtom(const mol::AtomHandleList& ahl, + int ordinal) const +{ + if (ahl.empty()) + return mol::AtomHandle(); + const mol::AtomHandle* r_it=&ahl.back(); + if (static_cast<int>(ahl.size())>ordinal) { + r_it=&ahl.front()+ordinal; + } + while ((r_it>=&ahl.front()) && + (static_cast<int>(r_it->Impl()->GetState())>ordinal)) { + --r_it; + } + bool not_found=(r_it<&ahl.front() || + static_cast<int>(r_it->Impl()->GetState())!=ordinal); + return not_found ? mol::AtomHandle() : *r_it; +} + +void Processor::FillResidueProps(mol::ResidueHandle residue, + CompoundPtr compound) const +{ + residue.SetChemClass(compound->GetChemClass()); + residue.SetChemType(compound->GetChemType()); + residue.SetOneLetterCode(compound->GetOneLetterCode()); +} + +void Processor::ConnectAtomsOfResidue(mol::ResidueHandle rh, + CompoundPtr compound, + bool strict_hydrogens) const +{ + + //if (!compound) { + // dist_connect(this, rh.GetAtomList()); + // return; + //} + //if (unknown_atoms_) { + // dist_connect(this, rh.GetAtomList()); + // return; + //} + mol::XCSEditor e=rh.GetEntity().EditXCS(mol::BUFFERED_EDIT); + BondSpecList::const_iterator j=compound->GetBondSpecs().begin(); + mol::AtomHandleList atoms=rh.GetAtomList(); + for(; j!=compound->GetBondSpecs().end(); ++j) { + const BondSpec& bond=*j; + mol::AtomHandle a1=this->LocateAtom(atoms, bond.atom_one); + mol::AtomHandle a2=this->LocateAtom(atoms, bond.atom_two); + if (a1.IsValid() && a2.IsValid()) { + if (!this->GetCheckBondFeasibility()) { + if (strict_hydrogens && (a1.GetElement()=="H" || + a2.GetElement()=="D")) { + continue; + } + e.Connect(a1, a2, bond.order); + } else { + if (IsBondFeasible(a1, a2)) { + if (strict_hydrogens && (a1.GetElement()=="H" || + a2.GetElement()=="D")) { + continue; + } + e.Connect(a1, a2, bond.order); + } + } + } + } +} + + +void Processor::ReorderAtoms(mol::ResidueHandle residue, + CompoundPtr compound, + bool fix_element) const +{ + mol::impl::ResidueImplPtr impl=residue.Impl(); + mol::impl::AtomImplList::iterator i=impl->GetAtomList().begin(); + for (; i!=impl->GetAtomList().end(); ++i) { + mol::impl::AtomImplPtr atom=*i; + atom->SetState(std::numeric_limits<unsigned int>::max()); + int index=compound->GetAtomSpecIndex(atom->GetName()); + if (index==-1) { + atom->SetState(std::numeric_limits<unsigned int>::max()); + continue; + } + atom->SetState((compound->GetAtomSpecs())[index].ordinal); + // override element + if (fix_element) { + atom->SetElement((compound->GetAtomSpecs())[index].element); + } + } + std::sort(impl->GetAtomList().begin(), impl->GetAtomList().end(), + OrdinalAtomComp()); +} + +bool Processor::HasUnknownAtoms(mol::ResidueHandle res, + CompoundPtr compound, + bool strict_hydrogens) const +{ + AtomSpecList::const_iterator j=compound->GetAtomSpecs().begin(); + mol::AtomHandleList atoms=res.GetAtomList(); + mol::AtomHandleList::iterator i=atoms.begin(); + for (mol::AtomHandleList::iterator + i=atoms.begin(), e=atoms.end(); i!=e; ++i) { + if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max()) { + if (((*i).GetElement()=="H" || (*i).GetElement()=="D") && + !strict_hydrogens) { + continue; + } + return true; + } + } + return false; +} + +mol::AtomHandleList GetUnknownAtomsOfResidue(mol::ResidueHandle res, + CompoundPtr compound, + bool strict_hydrogens) +{ + mol::AtomHandleList atoms=res.GetAtomList(); + mol::AtomHandleList unks; + const AtomSpecList& atom_specs=compound->GetAtomSpecs(); + for (mol::AtomHandleList::const_iterator + j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + bool found=false; + for (AtomSpecList::const_iterator + k=atom_specs.begin(), e3=atom_specs.end(); k!=e3; ++k) { + if (k->name==j->GetName() || k->alt_name==j->GetName()) { + found=true; + break; + } + } + if (!found) { + unks.push_back(*j); + } + } + return unks; +} + +void Processor::DistanceBasedConnect(mol::AtomHandle atom) const +{ + mol::EntityHandle ent=atom.GetEntity(); + mol::XCSEditor editor=ent.EditXCS(mol::BUFFERED_EDIT); + mol::AtomHandleList alist = ent.FindWithin(atom.GetPos(),4.0); + mol::ResidueHandle res_a=atom.GetResidue(); + for (mol::AtomHandleList::const_iterator it=alist.begin(), + e=alist.end();it!=e;++it) { + if (*it!=atom) { + if (IsBondFeasible(atom, *it)) { + if (Processor::AreResiduesConsecutive(res_a, it->GetResidue()) || + it->GetResidue()==res_a) { + editor.Connect(*it, atom); + } + } + } + } +} + +bool Processor::AreResiduesConsecutive(mol::ResidueHandle r1, + mol::ResidueHandle r2) +{ + if (!r1 || !r2) return false; // protect against invalid handles + if(r1.GetChain() != r2.GetChain()) return false; + return r2.GetNumber().GetNum()==r1.GetNumber().GetNum()+1 || + r2.GetNumber().GetInsCode()==r1.GetNumber().NextInsertionCode().GetInsCode(); +} + + }} diff --git a/modules/conop/src/processor.hh b/modules/conop/src/processor.hh index a63a5105f5cd549d8d8ac0584834b757c4d2fdeb..0bf71a89676cdefa4b8c615a98157ab5f6afbe06 100644 --- a/modules/conop/src/processor.hh +++ b/modules/conop/src/processor.hh @@ -22,7 +22,7 @@ #include <ost/mol/entity_handle.hh> #include "module_config.hh" #include "diag.hh" - +#include "compound.hh" namespace ost { namespace conop { typedef enum { @@ -53,14 +53,21 @@ protected: mol::EntityHandle ent) const { return true; } virtual bool EndProcessing(DiagnosticsPtr diags, mol::EntityHandle ent) const { return true; } - void ProcessUnkResidue(DiagnosticsPtr diags, - mol::ResidueHandle res) const; - void ProcessUnkAtoms(DiagnosticsPtr diags, - mol::AtomHandleList unks) const; + bool HasUnknownAtoms(mol::ResidueHandle residue, CompoundPtr compound, + bool strict_hydrogens) const; + void ReorderAtoms(mol::ResidueHandle residue, CompoundPtr compound, + bool fix_element) const; + void FillResidueProps(mol::ResidueHandle residue, CompoundPtr compound) const; + void ConnectAtomsOfResidue(mol::ResidueHandle residue, + CompoundPtr compound, bool strict_hydrogens) const; + void ConnectResidues(mol::ResidueHandle residue, mol::ResidueHandle next) const; + static bool AreResiduesConsecutive(mol::ResidueHandle a, mol::ResidueHandle b); + void DistanceBasedConnect(mol::AtomHandle atom) const; + mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const; public: - Processor(): strict_hydrogens_(false), check_bond_feasibility_(false), - assign_torsions_(false), connect_(true), unk_atom_treatment_(CONOP_WARN), - unk_res_treatment_(CONOP_WARN), zero_occ_treatment_(CONOP_SILENT) {} + Processor(): check_bond_feasibility_(false), + assign_torsions_(false), connect_(true), + zero_occ_treatment_(CONOP_SILENT) {} void SetConnect(bool connect) { connect_ = connect; } @@ -74,37 +81,16 @@ public: bool GetAssignTorsions() const { return assign_torsions_; } - ConopAction GetUnkResidueTreatment() const { - return unk_res_treatment_; - } - - ConopAction GetUnkAtomTreatment() const { - return unk_atom_treatment_; - } bool GetCheckBondFeasibility() const { return check_bond_feasibility_; } - bool GetStrictHydrogens() const { - return strict_hydrogens_; - } - - void SetStrictHydrogens(bool flag) { - strict_hydrogens_ = flag; - } void SetCheckBondFeasibility(bool flag) { check_bond_feasibility_ = flag; } - void SetUnkResidueTreatment(ConopAction action) { - unk_res_treatment_ = action; - } - - void SetUnkAtomTreatment(ConopAction action) { - unk_atom_treatment_ = action; - } ConopAction GetZeroOccTreatment() const { return zero_occ_treatment_; @@ -114,18 +100,21 @@ public: zero_occ_treatment_ = action; } private: - bool strict_hydrogens_; bool check_bond_feasibility_; bool assign_torsions_; bool connect_; - ConopAction unk_atom_treatment_; - ConopAction unk_res_treatment_; ConopAction zero_occ_treatment_; }; ConopAction DLLEXPORT_OST_CONOP ConopActionFromString(const String& name); +/// \brief guess element of atom based on name and hetatm flag +String DLLEXPORT_OST_CONOP GuessAtomElement(const String& atom_name, bool hetatm); + +/// \brief guess chemclass based on atoms of residue +mol::ChemClass DLLEXPORT_OST_CONOP GuessChemClass(mol::ResidueHandle res); + /// \brief assigns phi/psi/omega to all residues marked peptide-linking of /// the chain /// @@ -138,6 +127,10 @@ void DLLEXPORT_OST_CONOP AssignBackboneTorsions(mol::ResidueHandle prev, bool DLLEXPORT_OST_CONOP IsBondFeasible(const mol::AtomHandle&, const mol::AtomHandle&); +mol::AtomHandleList DLLEXPORT_OST_CONOP +GetUnknownAtomsOfResidue(mol::ResidueHandle residue, + CompoundPtr compound, + bool strict_hydrogens=false); }} #endif diff --git a/modules/conop/src/rule_based.cc b/modules/conop/src/rule_based.cc index dffb8c8c411e247338b65dc27efbd89c34eee1fd..b9e3a78387076661d088c8b9780081093d8f30f5 100644 --- a/modules/conop/src/rule_based.cc +++ b/modules/conop/src/rule_based.cc @@ -29,12 +29,6 @@ namespace ost { namespace conop { -struct OrdinalAtomComp { - bool operator()(const mol::impl::AtomImplPtr& a, - const mol::impl::AtomImplPtr& b) const { - return a->GetState()<b->GetState(); - } -}; void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, @@ -56,8 +50,9 @@ void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, this->ProcessUnkResidue(diags, residue); continue; } - this->ReorderAtoms(residue, compound); - bool unks = this->HasUnknownAtoms(residue, compound); + this->ReorderAtoms(residue, compound, this->GetFixElement()); + bool unks = this->HasUnknownAtoms(residue, compound, + this->GetStrictHydrogens()); if (unks) { mol::AtomHandleList unk_atoms; unk_atoms = GetUnknownAtomsOfResidue(residue, compound, @@ -70,7 +65,8 @@ void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, } this->FillResidueProps(residue, compound); if (this->GetConnect()) { - this->ConnectAtomsOfResidue(residue, compound); + this->ConnectAtomsOfResidue(residue, compound, + this->GetStrictHydrogens()); this->ConnectResidues(prev, residue); } prev = residue; @@ -82,170 +78,71 @@ void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, } } -void RuleBasedProcessor::ConnectResidues(mol::ResidueHandle rh, - mol::ResidueHandle next) const +void RuleBasedProcessor::ProcessUnkResidue(DiagnosticsPtr diags, + mol::ResidueHandle res) const { - if (!next.IsValid() || !rh.IsValid()) { + // Don't do anything if treatment is set to SILENT + if (this->GetUnkResidueTreatment() == CONOP_SILENT) { return; } - - mol::XCSEditor e=rh.GetEntity().EditXCS(mol::BUFFERED_EDIT); - - // check if both of the residues are able to form a peptide bond. - if (rh.IsPeptideLinking() && next.IsPeptideLinking()) { - // If we have an OXT then there is no peptide bond connecting the two - // residues. - if (rh.FindAtom("OXT")) + switch (this->GetUnkResidueTreatment()) { + case CONOP_WARN: + diags->AddDiag(DIAG_UNK_RESIDUE, "unknown residue %0") + .AddResidue(res); + break; + case CONOP_FATAL: + // FIXME: Implement a ConopError based on Diag... + break; + case CONOP_REMOVE_RESIDUE: + case CONOP_REMOVE: + diags->AddDiag(DIAG_UNK_RESIDUE, "removed unknown residue %0") + .AddResidue(res); + res.GetEntity().EditXCS().DeleteResidue(res); return; - mol::AtomHandle c=rh.FindAtom("C"); - mol::AtomHandle n=next.FindAtom("N"); - // Give subclasses a chance to give us their opinions on the feasibility of - // the peptide bond. - if (c.IsValid() && n.IsValid() && IsBondFeasible(c, n)) { - e.Connect(c, n, 1); - rh.SetIsProtein(true); - next.SetIsProtein(true); - } - } else if (rh.IsNucleotideLinking() && next.IsNucleotideLinking()) { - mol::AtomHandle c=rh.FindAtom("O3'"); - mol::AtomHandle n=next.FindAtom("P"); - if (c.IsValid() && n.IsValid() && IsBondFeasible(c, n)) { - e.Connect(c, n, 1); - } + default: + assert(0 && "unhandled switch"); } } -void RuleBasedProcessor::FillResidueProps(mol::ResidueHandle residue, - CompoundPtr compound) const -{ - residue.SetChemClass(compound->GetChemClass()); - residue.SetChemType(compound->GetChemType()); - residue.SetOneLetterCode(compound->GetOneLetterCode()); -} - -mol::AtomHandle RuleBasedProcessor::LocateAtom(const mol::AtomHandleList& ahl, - int ordinal) const -{ - if (ahl.empty()) - return mol::AtomHandle(); - const mol::AtomHandle* r_it=&ahl.back(); - if (static_cast<int>(ahl.size())>ordinal) { - r_it=&ahl.front()+ordinal; - } - while ((r_it>=&ahl.front()) && - (static_cast<int>(r_it->Impl()->GetState())>ordinal)) { - --r_it; - } - bool not_found=(r_it<&ahl.front() || - static_cast<int>(r_it->Impl()->GetState())!=ordinal); - return not_found ? mol::AtomHandle() : *r_it; -} - - -void RuleBasedProcessor::ConnectAtomsOfResidue(mol::ResidueHandle rh, - CompoundPtr compound) const -{ - - //if (!compound) { - // dist_connect(this, rh.GetAtomList()); - // return; - //} - //if (unknown_atoms_) { - // dist_connect(this, rh.GetAtomList()); - // return; - //} - mol::XCSEditor e=rh.GetEntity().EditXCS(mol::BUFFERED_EDIT); - BondSpecList::const_iterator j=compound->GetBondSpecs().begin(); - mol::AtomHandleList atoms=rh.GetAtomList(); - for(; j!=compound->GetBondSpecs().end(); ++j) { - const BondSpec& bond=*j; - mol::AtomHandle a1=this->LocateAtom(atoms, bond.atom_one); - mol::AtomHandle a2=this->LocateAtom(atoms, bond.atom_two); - if (a1.IsValid() && a2.IsValid()) { - if (!this->GetCheckBondFeasibility()) { - if (this->GetStrictHydrogens() && (a1.GetElement()=="H" || - a2.GetElement()=="D")) { - continue; - } - e.Connect(a1, a2, bond.order); - } else { - if (IsBondFeasible(a1, a2)) { - if (this->GetStrictHydrogens() && (a1.GetElement()=="H" || - a2.GetElement()=="D")) { - continue; - } - e.Connect(a1, a2, bond.order); - } - } - } - } -} - - -void RuleBasedProcessor::ReorderAtoms(mol::ResidueHandle residue, - CompoundPtr compound) const +void RuleBasedProcessor::ProcessUnkAtoms(DiagnosticsPtr diags, + mol::AtomHandleList unks) const { - mol::impl::ResidueImplPtr impl=residue.Impl(); - mol::impl::AtomImplList::iterator i=impl->GetAtomList().begin(); - for (; i!=impl->GetAtomList().end(); ++i) { - mol::impl::AtomImplPtr atom=*i; - atom->SetState(std::numeric_limits<unsigned int>::max()); - int index=compound->GetAtomSpecIndex(atom->GetName()); - if (index==-1) { - atom->SetState(std::numeric_limits<unsigned int>::max()); - continue; - } - atom->SetState((compound->GetAtomSpecs())[index].ordinal); - // override element - if (this->GetFixElement()) { - atom->SetElement((compound->GetAtomSpecs())[index].element); - } + // Don't do anything if treatment is set to SILENT + if (this->GetUnkAtomTreatment() == CONOP_SILENT) { + return; } - std::sort(impl->GetAtomList().begin(), impl->GetAtomList().end(), - OrdinalAtomComp()); -} - -bool RuleBasedProcessor::HasUnknownAtoms(mol::ResidueHandle res, - CompoundPtr compound) const -{ - AtomSpecList::const_iterator j=compound->GetAtomSpecs().begin(); - mol::AtomHandleList atoms=res.GetAtomList(); - mol::AtomHandleList::iterator i=atoms.begin(); + + assert(!unks.empty() && "empty unk list"); + mol::XCSEditor edi = unks.front().GetEntity().EditXCS(); for (mol::AtomHandleList::iterator - i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max()) { - if (((*i).GetElement()=="H" || (*i).GetElement()=="D") && - !this->GetStrictHydrogens()) { - continue; - } - return true; - } - } - return false; -} - -mol::AtomHandleList GetUnknownAtomsOfResidue(mol::ResidueHandle res, - CompoundPtr compound, - bool strict_hydrogens) -{ - mol::AtomHandleList atoms=res.GetAtomList(); - mol::AtomHandleList unks; - const AtomSpecList& atom_specs=compound->GetAtomSpecs(); - for (mol::AtomHandleList::const_iterator - j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { - bool found=false; - for (AtomSpecList::const_iterator - k=atom_specs.begin(), e3=atom_specs.end(); k!=e3; ++k) { - if (k->name==j->GetName() || k->alt_name==j->GetName()) { - found=true; + i = unks.begin(), e = unks.end(); i != e; ++i) { + switch (this->GetUnkAtomTreatment()) { + case CONOP_REMOVE_ATOM: + case CONOP_REMOVE: + edi.DeleteAtom(*i); + diags->AddDiag(DIAG_UNK_ATOM, "removed unknown atom %0 from residue %1") + .AddString(i->GetName()).AddResidue(i->GetResidue()); break; - } - } - if (!found) { - unks.push_back(*j); + case CONOP_WARN: + diags->AddDiag(DIAG_UNK_ATOM, "residue %0 contains unknown atom %1") + .AddResidue(i->GetResidue()).AddString(i->GetName()); + break; + case CONOP_FATAL: + // FIXME: Implement a ConopError based on Diag... + break; + case CONOP_REMOVE_RESIDUE: + diags->AddDiag(DIAG_UNK_ATOM, "removed residue %0 with unknown atom %1") + .AddString(i->GetResidue().GetQualifiedName()) + .AddString(i->GetName()); + edi.DeleteResidue(i->GetResidue()); + return; + default: + assert(0 && "unhandled switch"); } } - return unks; } + + + }} diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh index 2b857398e3caa3d57ceda1397816b2c010b1148d..6c17d7343a717846987aeda0301ebea77f29bfb9 100644 --- a/modules/conop/src/rule_based.hh +++ b/modules/conop/src/rule_based.hh @@ -37,38 +37,58 @@ typedef boost::shared_ptr<RuleBasedProcessor> RuleBasedProcessorPtr; class DLLEXPORT_OST_CONOP RuleBasedProcessor : public Processor { public: RuleBasedProcessor(CompoundLibPtr compound_lib): - lib_(compound_lib) + lib_(compound_lib), fix_element_(true), strict_hydrogens_(false), + unk_res_treatment_(CONOP_WARN), unk_atom_treatment_(CONOP_WARN) { } + ConopAction GetUnkResidueTreatment() const { + return unk_res_treatment_; + } + + ConopAction GetUnkAtomTreatment() const { + return unk_atom_treatment_; + } + bool GetFixElement() const { return fix_element_; } void SetFixElement(bool flag) { fix_element_ = flag; } + bool GetStrictHydrogens() const { + return strict_hydrogens_; + } + + void SetStrictHydrogens(bool flag) { + strict_hydrogens_ = flag; + } + void SetUnkResidueTreatment(ConopAction action) { + unk_res_treatment_ = action; + } + + void SetUnkAtomTreatment(ConopAction action) { + unk_atom_treatment_ = action; + } virtual ProcessorPtr Copy() const { return ProcessorPtr(new RuleBasedProcessor(*this)); } protected: + void ProcessUnkResidue(DiagnosticsPtr diags, + mol::ResidueHandle res) const; + void ProcessUnkAtoms(DiagnosticsPtr diags, + mol::AtomHandleList unks) const; virtual void DoProcess(DiagnosticsPtr diags, mol::EntityHandle ent) const; private: - bool HasUnknownAtoms(mol::ResidueHandle residue, CompoundPtr compound) const; - void ReorderAtoms(mol::ResidueHandle residue, CompoundPtr compound) const; - void FillResidueProps(mol::ResidueHandle residue, CompoundPtr compound) const; - void ConnectAtomsOfResidue(mol::ResidueHandle residue, CompoundPtr compound) const; - void ConnectResidues(mol::ResidueHandle residue, mol::ResidueHandle next) const; - mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const; CompoundLibPtr lib_; bool fix_element_; + bool strict_hydrogens_; + ConopAction unk_res_treatment_; + ConopAction unk_atom_treatment_; }; -mol::AtomHandleList DLLEXPORT_OST_CONOP -GetUnknownAtomsOfResidue(mol::ResidueHandle residue, - CompoundPtr compound, - bool strict_hydrogens=false); }} #endif diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index d65212779729e7e12c306c30ee5084d67923a526..29140807826e25e812d312371013f9ca96e2b3bd 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -1,9 +1,8 @@ set(OST_CONOP_UNIT_TESTS - # test_heuristic_builder.cc + test_heuristic_conop.cc tests.cc - test_rule_based_conop.cc - # test_builder.cc - helper.cc + test_rule_based_conop.cc + helper.cc test_compound.py test_cleanup.py test_processor.py diff --git a/modules/conop/tests/helper.cc b/modules/conop/tests/helper.cc index 9090a74532bf058f332f73d4f1e486ebbe5cea2d..bc3b4af781adbda8eae79437d0b9d552b28cb0fa 100644 --- a/modules/conop/tests/helper.cc +++ b/modules/conop/tests/helper.cc @@ -327,5 +327,55 @@ void verify_1zk_connectivity(const ResidueHandle& r1) BOOST_CHECK(BondExists(r1.FindAtom("CD"), r1.FindAtom("N3"))); } +ResidueHandle make_arg(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res = e.AppendResidue(chain, "ARG"); + e.InsertAtom(res, "N",geom::Vec3(20.202,33.112,58.011)); + e.InsertAtom(res, "CA",geom::Vec3(19.396,31.903,58.033)); + e.InsertAtom(res, "C",geom::Vec3(18.608,31.739,59.328)); + e.InsertAtom(res, "O",geom::Vec3(17.651,30.965,59.381)); + e.InsertAtom(res, "CB",geom::Vec3(20.284,30.681,57.801)); + e.InsertAtom(res, "CG",geom::Vec3(20.665,30.488,56.342)); + e.InsertAtom(res, "CD",geom::Vec3(21.557,29.281,56.154)); + e.InsertAtom(res, "NE",geom::Vec3(22.931,29.557,56.551)); + e.InsertAtom(res, "CZ",geom::Vec3(23.901,28.653,56.528)); + e.InsertAtom(res, "NH1",geom::Vec3(23.640,27.417,56.130)); + e.InsertAtom(res, "NH2",geom::Vec3(25.132,28.980,56.893)); + return res; +} + +ResidueHandle make_leu(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res=e.AppendResidue(chain, "LEU"); + + e.InsertAtom(res, "N", geom::Vec3(19.003,32.473,60.366)); + e.InsertAtom(res, "CA", geom::Vec3(18.330,32.402,61.664)); + e.InsertAtom(res, "C", geom::Vec3(17.884,33.787,62.117)); + e.InsertAtom(res, "O", geom::Vec3(17.853,34.091,63.308)); + e.InsertAtom(res, "CB", geom::Vec3(19.269,31.793,62.710)); + e.InsertAtom(res, "CG", geom::Vec3(19.695,30.340,62.501)); + e.InsertAtom(res, "CD1", geom::Vec3(20.585,29.897,63.648)); + e.InsertAtom(res, "CD2", geom::Vec3(18.461,29.459,62.420)); + return res; +} + +ResidueHandle make_defective_leu(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res=e.AppendResidue(chain, "LEU"); + + e.InsertAtom(res, "N", geom::Vec3(19.003,32.473,60.366)); + e.InsertAtom(res, "CA", geom::Vec3(18.330,32.402,61.664)); + e.InsertAtom(res, "C", geom::Vec3(17.884,33.787,62.117)); + e.InsertAtom(res, "O", geom::Vec3(17.853,34.091,63.308)); + e.InsertAtom(res, "CB", geom::Vec3(19.269,31.793,102.710)); + e.InsertAtom(res, "CG", geom::Vec3(19.695,30.340,62.501)); + e.InsertAtom(res, "CD1", geom::Vec3(20.585,29.897,63.648)); + e.InsertAtom(res, "CD2", geom::Vec3(18.461,29.459,62.420)); + return res; +} + }} diff --git a/modules/conop/tests/helper.hh b/modules/conop/tests/helper.hh index 43b70ed306ead19127f2f5552f51ec0509210447..a4ad2309c340f35149579c58dc872ced07c27cf6 100644 --- a/modules/conop/tests/helper.hh +++ b/modules/conop/tests/helper.hh @@ -12,6 +12,9 @@ mol::ResidueHandle make_cytosine(mol::ChainHandle chain); mol::ResidueHandle make_uracil1(mol::ChainHandle chain); mol::ResidueHandle make_uracil2(mol::ChainHandle chain); mol::ResidueHandle make_defective_uracil2(mol::ChainHandle chain); +mol::ResidueHandle make_leu(mol::ChainHandle chain); +mol::ResidueHandle make_arg(mol::ChainHandle chain); +mol::ResidueHandle make_defective_leu(mol::ChainHandle chain); mol::ResidueHandle make_1zk(mol::ChainHandle chain); void verify_1zk_connectivity(const mol::ResidueHandle& r1); void verify_nucleotide_connectivity(const mol::ResidueHandle& res); diff --git a/modules/conop/tests/test_heuristic_conop.cc b/modules/conop/tests/test_heuristic_conop.cc new file mode 100644 index 0000000000000000000000000000000000000000..4943393161d1e3f8911073d67a6f261ccc091f2f --- /dev/null +++ b/modules/conop/tests/test_heuristic_conop.cc @@ -0,0 +1,144 @@ +// ------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> + +// Copyright (C) 2008-2011 by the OpenStructure authors + +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. + +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +// ------------------------------------------------------------------------------ + +#include <ost/log.hh> +#include <ost/mol/mol.hh> +#include <ost/conop/heuristic.hh> + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <boost/test/auto_unit_test.hpp> +#include "helper.hh" +using boost::unit_test_framework::test_suite; +using namespace ost; +using namespace ost::conop; +using namespace ost::mol; + + + +void verify_connectivity_x(const ResidueHandle& res) +{ + BOOST_CHECK(BondExists(res.FindAtom("XN"), + res.FindAtom("XCA"))); + BOOST_CHECK(BondExists(res.FindAtom("XCA"), + res.FindAtom("XC"))); + BOOST_CHECK(BondExists(res.FindAtom("XC"), + res.FindAtom("XO"))); + BOOST_CHECK(BondExists(res.FindAtom("XCA"), + res.FindAtom("XCB"))); + BOOST_CHECK(BondExists(res.FindAtom("XCB"), + res.FindAtom("XCG"))); + if (res.GetKey()=="ARG") { + + BOOST_CHECK(BondExists(res.FindAtom("XCB"), + res.FindAtom("XCG"))); + BOOST_CHECK(BondExists(res.FindAtom("XCG"), + res.FindAtom("XCD"))); + BOOST_CHECK(BondExists(res.FindAtom("XCD"), + res.FindAtom("XNE"))); + BOOST_CHECK(BondExists(res.FindAtom("XNE"), + res.FindAtom("XCZ"))); + BOOST_CHECK(BondExists(res.FindAtom("XCZ"), + res.FindAtom("XNH1"))); + BOOST_CHECK(BondExists(res.FindAtom("XCZ"), + res.FindAtom("XNH2"))); + // TODO: Check that no other atoms are connected! + } + if (res.GetKey()=="ILE") { + BOOST_CHECK(BondExists(res.FindAtom("XCG"), + res.FindAtom("XCD1"))); + BOOST_CHECK(BondExists(res.FindAtom("XCG"), + res.FindAtom("XCD2"))); + // TODO: Check that no other atoms are connected! + } +} + +void verify_connectivity(const ResidueHandle& res) +{ + BOOST_CHECK(BondExists(res.FindAtom("N"), + res.FindAtom("CA"))); + BOOST_CHECK(BondExists(res.FindAtom("CA"), + res.FindAtom("C"))); + BOOST_CHECK(BondExists(res.FindAtom("C"), + res.FindAtom("O"))); + BOOST_CHECK(BondExists(res.FindAtom("CA"), + res.FindAtom("CB"))); + BOOST_CHECK(BondExists(res.FindAtom("CB"), + res.FindAtom("CG"))); + if (res.GetKey()=="ARG") { + + BOOST_CHECK(BondExists(res.FindAtom("CB"), + res.FindAtom("CG"))); + BOOST_CHECK(BondExists(res.FindAtom("CG"), + res.FindAtom("CD"))); + BOOST_CHECK(BondExists(res.FindAtom("CD"), + res.FindAtom("NE"))); + BOOST_CHECK(BondExists(res.FindAtom("NE"), + res.FindAtom("CZ"))); + BOOST_CHECK(BondExists(res.FindAtom("CZ"), + res.FindAtom("NH1"))); + BOOST_CHECK(BondExists(res.FindAtom("CZ"), + res.FindAtom("NH2"))); + // TODO: Check that no other atoms are connected! + } + if (res.GetKey()=="ILE") { + BOOST_CHECK(BondExists(res.FindAtom("CG"), + res.FindAtom("CD1"))); + BOOST_CHECK(BondExists(res.FindAtom("CG"), + res.FindAtom("CD2"))); + // TODO: Check that no other atoms are connected! + } +} + +BOOST_AUTO_TEST_SUITE( conop ); + + +BOOST_AUTO_TEST_CASE(does_name_based_connect) +{ + EntityHandle e=CreateEntity(); + ChainHandle c=e.EditXCS().InsertChain("A"); + ResidueHandle ile=make_leu(c); + ResidueHandle arg=make_arg(c); + HeuristicProcessor proc; + proc.Process(e); + + verify_connectivity(arg); + verify_connectivity(ile); +} + +BOOST_AUTO_TEST_CASE(does_assign_torsions) { + EntityHandle e=CreateEntity(); + ChainHandle c=e.EditXCS().InsertChain("A"); + ResidueHandle l1=make_leu(c); + ResidueHandle a2=make_arg(c); + ResidueHandle l3=make_leu(c); + e.EditXCS().Connect(l1.FindAtom("C"), a2.FindAtom("N")); + e.EditXCS().Connect(a2.FindAtom("C"), l3.FindAtom("N")); + HeuristicProcessor proc; + proc.SetAssignTorsions(true); + proc.Process(e); + BOOST_CHECK(a2.GetPhiTorsion().IsValid()); + BOOST_CHECK(l3.GetPhiTorsion().IsValid()); + + BOOST_CHECK(l1.GetPsiTorsion().IsValid()); + BOOST_CHECK(a2.GetPsiTorsion().IsValid()); +} + +BOOST_AUTO_TEST_SUITE_END(); + diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 616f04c087feb24b1347353f69f161464b0ea194..c0ea293daa1fde9184b54443e213464b695b8ec6 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -44,12 +44,16 @@ class IOProfiles: if not profiles: profiles=IOProfiles() + if conop.GetDefaultLib(): + processor = conop.RuleBasedProcessor(conop.GetDefaultLib()) + else: + processor = conop.HeuristicProcessor() profiles['STRICT']=IOProfile(dialect='PDB', fault_tolerant=False, - strict_hydrogens=False, quack_mode=False) + quack_mode=False, processor=processor) profiles['SLOPPY']=IOProfile(dialect='PDB', fault_tolerant=True, - strict_hydrogens=False, quack_mode=True) + quack_mode=True, processor=processor) profiles['CHARMM']=IOProfile(dialect='CHARMM', fault_tolerant=True, - strict_hydrogens=False, quack_mode=False) + quack_mode=False, processor=processor) profiles['DEFAULT']='STRICT' def _override(val1, val2): @@ -86,7 +90,7 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, fault_tolerant=None, load_multi=False, quack_mode=None, join_spread_atom_records=None, calpha_only=None, profile='DEFAULT', remote=False, dialect=None, - strict_hydrogens=None, seqres=False, bond_feasibility_check=None): + seqres=False, bond_feasibility_check=None): """ Load PDB file from disk and return one or more entities. Several options allow to customize the exact behaviour of the PDB import. For more information @@ -124,8 +128,6 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, :type dialect: :class:`str` - :param strict_hydrogens: If set, overrides the value of - :attr:`IOProfile.strict_hydrogens`. :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or inexistent file @@ -149,9 +151,7 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, prof.dialect=_override(prof.dialect, dialect) prof.quack_mode=_override(prof.quack_mode, quack_mode) if prof.processor: - prof.processor.strict_hydrogens=_override(prof.processor.strict_hydrogens, - strict_hydrogens) - prof.processor.check_bond_feasibilityk=_override(prof.processor.check_bond_feasibility, + prof.processor.check_bond_feasibility=_override(prof.processor.check_bond_feasibility, bond_feasibility_check) prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant) prof.join_spread_atom_records=_override(prof.join_spread_atom_records, @@ -281,7 +281,7 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', raise ValueError("No DCD filename given") return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes) -def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False, info=False): +def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False): """ Load MMCIF file from disk and return one or more entities. Several options allow to customize the exact behaviour of the MMCIF import. For more @@ -301,9 +301,6 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non :rtype: :class:`~ost.mol.EntityHandle`. - :param strict_hydrogens: If set, overrides the value of - :attr:`IOProfile.strict_hydrogens`. - :param seqres: Whether to read SEQRES records. If set to True, the loaded entity and seqres entry will be returned as second item. @@ -324,9 +321,6 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non prof = profile.Copy() prof.calpha_only=_override(prof.calpha_only, calpha_only) - if prof.processor: - proc = prof.processor - proc.strict_hydrogens=_override(proc.strict_hydrogens, strict_hydrogens) prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant) if remote: diff --git a/modules/io/pymod/export_pdb_io.cc b/modules/io/pymod/export_pdb_io.cc index 55c842c875415c8d81c412ac8431b53105c50ced..a80aff690a098556f857a7536d4dfdd4e814c47d 100644 --- a/modules/io/pymod/export_pdb_io.cc +++ b/modules/io/pymod/export_pdb_io.cc @@ -36,9 +36,8 @@ void (PDBWriter::*write_b)(const mol::EntityView&)=&PDBWriter::Write; void export_pdb_io() { class_<IOProfile>("IOProfile", - init<String,bool,bool,bool,bool,bool,bool,bool, + init<String,bool,bool,bool,bool,bool,bool, conop::ProcessorPtr>((arg("dialect")="PDB", - arg("strict_hydrogens")=false, arg("quack_mode")=false, arg("fault_tolerant")=false, arg("join_spread_atom_records")=false, @@ -49,12 +48,10 @@ void export_pdb_io() .def_readwrite("dialect", &IOProfile::dialect) .def_readwrite("fault_tolerant", &IOProfile::fault_tolerant) .def_readwrite("quack_mode", &IOProfile::quack_mode) - //.def_readwrite("strict_hydrogens", &IOProfile::strict_hydrogens) .def_readwrite("no_hetatms", &IOProfile::no_hetatms) .def_readwrite("calpha_only", &IOProfile::calpha_only) .def_readwrite("join_spread_atom_records", &IOProfile::join_spread_atom_records) .def_readwrite("processor", &IOProfile::processor) - //.def_readwrite("bond_feasibility_check", &IOProfile::bond_feasibility_check) .def("Copy", &IOProfile::Copy) .def(self_ns::str(self)) ; diff --git a/modules/io/src/mol/io_profile.hh b/modules/io/src/mol/io_profile.hh index 0bec6ba70c81a7935cac2bd226db36a57116603e..b3c43b6db4dca42b2575a355b42200ede7409a51 100644 --- a/modules/io/src/mol/io_profile.hh +++ b/modules/io/src/mol/io_profile.hh @@ -30,16 +30,15 @@ namespace ost { namespace io { struct DLLEXPORT IOProfile { public: - IOProfile(String d, bool sh, bool qm, bool ft, bool js, bool nh, + IOProfile(String d, bool qm, bool ft, bool js, bool nh, bool co, bool bf, conop::ProcessorPtr proc=conop::ProcessorPtr()): dialect(d), quack_mode(qm), fault_tolerant(ft), join_spread_atom_records(js), no_hetatms(nh), calpha_only(co), processor(proc) { if (!processor) return; processor->SetCheckBondFeasibility(bf); - processor->SetStrictHydrogens(sh); - //processor->SetQuackMode(qm); } + IOProfile(): dialect("PDB"), quack_mode(false), fault_tolerant(false), join_spread_atom_records(false), no_hetatms(false), calpha_only(false), processor() @@ -55,8 +54,7 @@ public: IOProfile Copy() { - return IOProfile(dialect, processor ? processor->GetStrictHydrogens() : false, - quack_mode, fault_tolerant, join_spread_atom_records, + return IOProfile(dialect, quack_mode, fault_tolerant, join_spread_atom_records, no_hetatms, calpha_only, processor ? processor->GetCheckBondFeasibility() : false, processor ? processor->Copy() : conop::ProcessorPtr()); diff --git a/modules/io/tests/test_io_pdb.cc b/modules/io/tests/test_io_pdb.cc index 09e0b44748e26d70659c5d43ceb7c32a92ed35be..111d797981ad92135fd7ac4d3320113e4d496f63 100644 --- a/modules/io/tests/test_io_pdb.cc +++ b/modules/io/tests/test_io_pdb.cc @@ -924,7 +924,7 @@ BOOST_AUTO_TEST_CASE(charmm_rname) { { PDBWriter writer(String("testfiles/pdb/charmm_rname-out.pdb"), - IOProfile("CHARMM", true, false, false, + IOProfile("CHARMM", false, false, false, false, false, true)); mol::EntityHandle ent=mol::CreateEntity(); @@ -944,7 +944,7 @@ BOOST_AUTO_TEST_CASE(charmm_longcname) { { PDBWriter writer(String("testfiles/pdb/charmm_longcname-out.pdb"), - IOProfile("CHARMM", true, false, false, + IOProfile("CHARMM", false, false, false, false, false, true)); mol::EntityHandle ent=mol::CreateEntity(); @@ -964,7 +964,7 @@ BOOST_AUTO_TEST_CASE(write_charmm_ter) { { PDBWriter writer(String("testfiles/pdb/charmm_ter-out.pdb"), - IOProfile("CHARMM", true, false, false, + IOProfile("CHARMM", false, false, false, false, false, true)); mol::EntityHandle ent=mol::CreateEntity(); diff --git a/modules/io/tests/test_io_pdb.py b/modules/io/tests/test_io_pdb.py index 348fde1d929669405e612cb92d3a274ceb5636f1..b446099a88feaea32650809080bb9a76bee2abab 100644 --- a/modules/io/tests/test_io_pdb.py +++ b/modules/io/tests/test_io_pdb.py @@ -11,15 +11,13 @@ class TestPDB(unittest.TestCase): ch = e.FindChain("A"); self.assertEquals(ch.GetIntProp("mol_id"), 1) -class TestPDB(unittest.TestCase): - def setUp(self): - pass - - def test_compnd_parser(self): + def test_no_bond_feasibility(self): profiles=io.IOProfiles() - profiles['NO_FEAS_CHECK']=io.IOProfile(bond_feasibility_check=False) + profiles['NO_FEAS_CHECK']=io.IOProfile(bond_feasibility_check=False, + processor=conop.HeuristicProcessor()) - e=io.LoadPDB('testfiles/pdb/simple_defective.pdb', restrict_chains="A",profile='NO_FEAS_CHECK') + e=io.LoadPDB('testfiles/pdb/simple_defective.pdb', restrict_chains="A", + profile='NO_FEAS_CHECK') res=e.FindResidue('A',3) diff --git a/modules/io/tests/testfiles/pdb/ter4.pdb b/modules/io/tests/testfiles/pdb/ter4.pdb index a088dd23a776b33aacab610917ab1cbb5b99c53a..cb2209f24a71bd13022a324d48fbeaafd679b6d6 100644 --- a/modules/io/tests/testfiles/pdb/ter4.pdb +++ b/modules/io/tests/testfiles/pdb/ter4.pdb @@ -1,24 +1,24 @@ -ATOM 1 CB ARG A 5 54.221 28.817 46.886 1.00 80.23 C -ATOM 2 CG ARG A 5 54.014 28.661 48.384 1.00 79.76 C -ATOM 3 CD ARG A 5 53.901 29.993 49.091 1.00 79.93 C -ATOM 4 NE ARG A 5 55.153 30.743 49.053 1.00 79.60 N -ATOM 5 CZ ARG A 5 55.375 31.851 49.753 1.00 79.44 C -ATOM 6 NH1 ARG A 5 54.425 32.333 50.545 1.00 79.21 N -ATOM 7 NH2 ARG A 5 56.543 32.475 49.661 1.00 79.57 N -ATOM 8 C ARG A 5 54.203 27.642 44.665 1.00 80.18 C -ATOM 9 O ARG A 5 54.697 26.854 43.860 1.00 80.24 O -ATOM 10 N ARG A 5 55.792 26.946 46.486 1.00 80.39 N -ATOM 11 CA ARG A 5 54.437 27.479 46.167 1.00 80.33 C +ATOM 1 N ARG A 5 55.792 26.946 46.486 1.00 80.39 N +ATOM 2 CA ARG A 5 54.437 27.479 46.167 1.00 80.33 C +ATOM 3 C ARG A 5 54.203 27.642 44.665 1.00 80.18 C +ATOM 4 O ARG A 5 54.697 26.854 43.860 1.00 80.24 O +ATOM 5 CB ARG A 5 54.221 28.817 46.886 1.00 80.23 C +ATOM 6 CG ARG A 5 54.014 28.661 48.384 1.00 79.76 C +ATOM 7 CD ARG A 5 53.901 29.993 49.091 1.00 79.93 C +ATOM 8 NE ARG A 5 55.153 30.743 49.053 1.00 79.60 N +ATOM 9 CZ ARG A 5 55.375 31.851 49.753 1.00 79.44 C +ATOM 10 NH1 ARG A 5 54.425 32.333 50.545 1.00 79.21 N +ATOM 11 NH2 ARG A 5 56.543 32.475 49.661 1.00 79.57 N ATOM 12 N SER A 6 53.441 28.666 44.297 1.00 80.44 N ATOM 13 CA SER A 6 53.129 28.922 42.897 1.00 80.22 C -ATOM 14 CB SER A 6 52.450 30.285 42.745 1.00 79.46 C -ATOM 15 OG SER A 6 52.075 30.513 41.398 1.00 79.08 O -ATOM 16 C SER A 6 54.371 28.872 42.015 1.00 80.20 C -ATOM 17 O SER A 6 54.274 28.620 40.812 1.00 80.66 O +ATOM 14 C SER A 6 54.371 28.872 42.015 1.00 80.20 C +ATOM 15 O SER A 6 54.274 28.620 40.812 1.00 80.66 O +ATOM 16 CB SER A 6 52.450 30.285 42.745 1.00 79.46 C +ATOM 17 OG SER A 6 52.075 30.513 41.398 1.00 79.08 O TER 18 SER A 6 ATOM 19 C44 MC3 B 264 39.239 27.459 24.810 1.00 79.54 C ATOM 20 C43 MC3 B 264 38.743 26.476 23.747 1.00 80.47 C ATOM 21 C44 MC3 C 265 62.635 29.009 23.846 1.00 81.10 C ATOM 22 C43 MC3 C 265 63.065 29.658 22.529 1.00 79.41 C ATOM 23 C42 MC3 C 265 61.857 29.941 21.636 1.00 79.32 C -END \ No newline at end of file +END