diff --git a/.gitignore b/.gitignore index ae5ec6bffbcb2ec43587ee1e400c83dcc2758019..999865f42d6427ffadb3da685ee7fee4c340848d 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ components.cif compounds.chemlib /deployment/macos/openstructure.dmg /scripts/dng +/modules/gui/src/module_config.hh *.vcproj /scripts/ost_config *.user @@ -62,3 +63,6 @@ Debug /modules/io/tests/temp_img.tmp PYTEST-*.xml ost_*_tests_log.xml +rules.ninja +build.ninja +modules/gui/src/dngr.qrc.depends diff --git a/CMakeLists.txt b/CMakeLists.txt index e85e030914f16a4f017af71db0a176646cb0b798..c8651ec834cfe3a60a435c603e234a1c6dcaf615 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -315,6 +315,10 @@ set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES #ost_match_boost_python_version(${PYTHON_LIBRARIES}) +if (CMAKE_COMPILER_IS_GNUCXX) + set(HIDDEN_VIS_MSG + "\n Hidden object visibility (-DHIDDEN_VISIBILITY) : ${_HIDDEN_VIS}") +endif() message(STATUS "OpenStructure will be built with the following options:\n" " Install Prefix (-DPREFIX) : ${CMAKE_INSTALL_PREFIX}\n" @@ -333,9 +337,6 @@ message(STATUS " Compound Lib (-DCOMPOUND_LIB) : ${_COMP_LIB}\n" " TMAlign and TMScore (-DCOMPILE_TMTOOLS) : ${_TM_TOOLS}\n" " Static Libraries (-DENABLE_STATIC) : ${ENABLE_STATIC}\n" - " Debian-style 'libexec' (-DDEBIAN_STYLE_LIBEXEC) : ${_DEBIAN_STYLE_LIBEXEC}") -if (CMAKE_COMPILER_IS_GNUCXX) - message(STATUS - " Hidden object visibility (-DHIDDEN_VISIBILITY) : ${_HIDDEN_VIS}") -endif() + " Debian-style 'libexec' (-DDEBIAN_STYLE_LIBEXEC) : ${_DEBIAN_STYLE_LIBEXEC}" + ${HIDDEN_VIS_MSG}) diff --git a/modules/conop/doc/functions.rst b/modules/conop/doc/functions.rst new file mode 100644 index 0000000000000000000000000000000000000000..661dc904b53881edea90faf70b76c0757a28362e --- /dev/null +++ b/modules/conop/doc/functions.rst @@ -0,0 +1,34 @@ +Conop Functions +======================================================================= + + +.. function:: AssignBackboneTorsions(prev, res, next) + AssignBackboneTorsions(chain) + AssignBackboneTorsions(residues) + + Assigns the backbone torsions PHI, PSI and OMEGA. The backbone atoms + are required to be connected for the torsions to be added. In addition, + only residues for which :method:`ResidueHandle.IsPeptideLinking` returns + true are considered. + + The first signature assigns the torsions to *res*, assuming prev is + the amino acid before, and *next* is the amino acid next to *res*. + Both *next* and *prev* may be invalid residues. + + The second and third signature assign the torsions to all peptidic + residues of the chain/the residue list. The residues in the chain, + the residue list are thought to run from N to C terminus. + + :param prev: The amino acid before *res* + :type prev: :class:`~mol.ResidueHandle` + :type next: :class:`~mol.ResidueHandle` + :param next: The amino acid after *res* + :type res: :class:`~mol.ResidueHandle` + :type res: The central amino acid. Must be a valid handle + +.. function:: GetUnknownAtomsOfResidue(residue, compound, strict_hydrogens=False) + + Returns the list of atoms present in *residue* that are not part of the + atom specifications in compound. + + :param strict_hydrogens: When set to true, hydrogen atoms are checked as well. diff --git a/modules/conop/pymod/export_rule_based.cc b/modules/conop/pymod/export_rule_based.cc index 74112149bc8bd43b5b5bf403258a01f4cd6b296b..06b0e7b26d344f0d46755f39a775d42907838001 100644 --- a/modules/conop/pymod/export_rule_based.cc +++ b/modules/conop/pymod/export_rule_based.cc @@ -6,7 +6,7 @@ using namespace ost::conop; void export_rule_based() { - class_<RuleBasedConop>("RuleBasedConop", init<CompoundLibPtr>()) - .def("Process", &RuleBasedConop::Process) + class_<RuleBasedProcessor>("RuleBasedProcessor", init<CompoundLibPtr>()) + .def("Process", &RuleBasedProcessor::Process) ; } diff --git a/modules/conop/src/CMakeLists.txt b/modules/conop/src/CMakeLists.txt index 55eb837f9ed69195ba9d667a87b39b514274ef48..dfa77f4490545802c73b5cfa1ebd14a5fa511601 100644 --- a/modules/conop/src/CMakeLists.txt +++ b/modules/conop/src/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_CONOP_HEADERS builder.hh builder_fw.hh conop.hh +processor.hh heuristic_builder.hh amino_acids.hh diag.hh @@ -17,6 +18,7 @@ ring_finder.hh set(OST_CONOP_SOURCES builder.cc +processor.cc amino_acids.cc conop.cc diag.cc diff --git a/modules/conop/src/diag.cc b/modules/conop/src/diag.cc index 75245fc93aa9414b0a8e1229a3b106957dd8ec30..470673bf73f3c6bcb7acc0836839c25c7d740336 100644 --- a/modules/conop/src/diag.cc +++ b/modules/conop/src/diag.cc @@ -29,7 +29,7 @@ String Diag::Format(bool colored) const switch (i->type) { case DIAG_ARG_TYPE_ATOM: if (colored) { - strings.push_back("\033[0;30m"+ + strings.push_back("\033[0;32m"+ atoms_[i->index].GetQualifiedName()+"\033[0m"); } else { strings.push_back(atoms_[i->index].GetQualifiedName()); @@ -37,7 +37,7 @@ String Diag::Format(bool colored) const break; case DIAG_ARG_TYPE_RESIDUE: if (colored) { - strings.push_back("\033[0;30m"+ + strings.push_back("\033[0;32m"+ residues_[i->index].GetQualifiedName()+"\033[0m"); } else { strings.push_back(residues_[i->index].GetQualifiedName()); @@ -45,7 +45,7 @@ String Diag::Format(bool colored) const break; case DIAG_ARG_TYPE_CHAIN: if (colored) { - strings.push_back("\033[0;30m"+ + strings.push_back("\033[0;32m"+ chains_[i->index].GetName()+"\033[0m"); } else { strings.push_back(chains_[i->index].GetName()); diff --git a/modules/conop/src/diag.hh b/modules/conop/src/diag.hh index 58f4765543c389b27c5925c9df7068bc2708e9dd..1e749f3d8953c5db0e673f335aa2134567b13480 100644 --- a/modules/conop/src/diag.hh +++ b/modules/conop/src/diag.hh @@ -1,4 +1,4 @@ -#//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ // This file is part of the OpenStructure project <www.openstructure.org> // // Copyright (C) 2008-2011 by the OpenStructure authors @@ -18,6 +18,7 @@ //------------------------------------------------------------------------------ #ifndef OST_CONOP_DIAG_HH #define OST_CONOP_DIAG_HH + #include <ost/mol/atom_handle.hh> #include <ost/mol/residue_handle.hh> #include <ost/mol/chain_handle.hh> @@ -26,11 +27,11 @@ namespace ost { namespace conop { typedef enum { - DIAG_ARG_TYPE_ATOM, - DIAG_ARG_TYPE_RESIDUE, - DIAG_ARG_TYPE_CHAIN, - DIAG_ARG_TYPE_STRING, - DIAG_ARG_TYPE_INT + DIAG_ARG_TYPE_ATOM, + DIAG_ARG_TYPE_RESIDUE, + DIAG_ARG_TYPE_CHAIN, + DIAG_ARG_TYPE_STRING, + DIAG_ARG_TYPE_INT } DiagArgType; typedef enum { @@ -42,39 +43,39 @@ typedef enum { class DLLEXPORT_OST_CONOP Diag { public: - Diag(DiagType typ, const char* fmt): type_(typ), format_(fmt) {} + Diag(DiagType typ, const char* fmt): type_(typ), format_(fmt) {} DiagType GetType() const { return type_; } - Diag& AddAtom(mol::AtomHandle atom) - { - atoms_.push_back(atom); - args_.push_back(ArgDesc(atoms_.size()-1, DIAG_ARG_TYPE_ATOM)); - return *this; - } + Diag& AddAtom(mol::AtomHandle atom) + { + atoms_.push_back(atom); + args_.push_back(ArgDesc(atoms_.size()-1, DIAG_ARG_TYPE_ATOM)); + return *this; + } - Diag& AddResidue(mol::ResidueHandle res) - { - residues_.push_back(res); - args_.push_back(ArgDesc(residues_.size()-1, DIAG_ARG_TYPE_RESIDUE)); - return *this; - } - Diag& AddChain(mol::ChainHandle chain) - { - chains_.push_back(chain); - args_.push_back(ArgDesc(chains_.size()-1, DIAG_ARG_TYPE_CHAIN)); - return *this; - } - Diag& AddInt(int int_val) - { - ints_.push_back(int_val); - args_.push_back(ArgDesc(ints_.size()-1, DIAG_ARG_TYPE_INT)); - return *this; - } - Diag& AddString(const String& str) - { - strings_.push_back(str); - args_.push_back(ArgDesc(strings_.size()-1, DIAG_ARG_TYPE_STRING)); - return *this; - } + Diag& AddResidue(mol::ResidueHandle res) + { + residues_.push_back(res); + args_.push_back(ArgDesc(residues_.size()-1, DIAG_ARG_TYPE_RESIDUE)); + return *this; + } + Diag& AddChain(mol::ChainHandle chain) + { + chains_.push_back(chain); + args_.push_back(ArgDesc(chains_.size()-1, DIAG_ARG_TYPE_CHAIN)); + return *this; + } + Diag& AddInt(int int_val) + { + ints_.push_back(int_val); + args_.push_back(ArgDesc(ints_.size()-1, DIAG_ARG_TYPE_INT)); + return *this; + } + Diag& AddString(const String& str) + { + strings_.push_back(str); + args_.push_back(ArgDesc(strings_.size()-1, DIAG_ARG_TYPE_STRING)); + return *this; + } mol::AtomHandle GetAtom(size_t index) const { assert(index<args_.size()); @@ -90,44 +91,53 @@ public: assert(index<args_.size()); return chains_[args_[index].index]; } - String Format(bool colored=true) const; + String Format(bool colored=true) const; private: - struct ArgDesc { - ArgDesc(size_t i, DiagArgType t): index(i), type(t) { } - size_t index; - DiagArgType type; - }; - DiagType type_; - String format_; - mol::AtomHandleList atoms_; - mol::ResidueHandleList residues_; + struct ArgDesc { + ArgDesc(size_t i, DiagArgType t): index(i), type(t) { } + size_t index; + DiagArgType type; + }; + DiagType type_; + String format_; + mol::AtomHandleList atoms_; + mol::ResidueHandleList residues_; mol::ChainHandleList chains_; - std::vector<String> strings_; - std::vector<int> ints_; - std::vector<ArgDesc> args_; + std::vector<String> strings_; + std::vector<int> ints_; + std::vector<ArgDesc> args_; }; -class DLLEXPORT_OST_CONOP DiagEngine { -public: - DiagEngine() {} +class Diagnostics; - ~DiagEngine() - { - for(std::vector<Diag*>::iterator - i=diags_.begin(), e=diags_.end(); i!=e;++i) { - delete *i; - } - } +typedef boost::shared_ptr<Diagnostics> DiagnosticsPtr; - Diag& AddDiag(DiagType type, const char* fmt) - { - diags_.push_back(new Diag(type, fmt)); - return *diags_.back(); - } +class DLLEXPORT_OST_CONOP Diagnostics { +public: + typedef std::vector<Diag*>::iterator diag_iterator; + typedef std::vector<Diag*>::const_iterator const_diag_iterator; + Diagnostics() {} - const std::vector<Diag*>& GetDiags() const { return diags_; } + ~Diagnostics() + { + for(std::vector<Diag*>::iterator + i=diags_.begin(), e=diags_.end(); i!=e;++i) { + delete *i; + } + } + + Diag& AddDiag(DiagType type, const char* fmt) + { + diags_.push_back(new Diag(type, fmt)); + return *diags_.back(); + } + diag_iterator diags_begin() { return diags_.begin(); } + diag_iterator diags_end() { return diags_.end(); } + const_diag_iterator diags_begin() const { return diags_.begin(); } + const_diag_iterator diags_end() const { return diags_.end(); } + size_t diag_count() const { return diags_.size(); } private: - std::vector<Diag*> diags_; + std::vector<Diag*> diags_; }; }} /* ost::conop */ diff --git a/modules/conop/src/model_check.hh b/modules/conop/src/model_check.hh index 25bdfb88b85e92094e28c717bcf3840d78d64ce2..8b4d65b05b98525ec4d09ef51f2a09722af10fad 100644 --- a/modules/conop/src/model_check.hh +++ b/modules/conop/src/model_check.hh @@ -8,9 +8,9 @@ namespace ost { namespace conop { class DLLEXPORT_OST_CONOP Checker { public: - Checker(CompoundLibPtr lib, const mol::EntityHandle& ent, - DiagEngine& diags): lib_(lib), ent_(ent), diags_(diags), - checked_unk_res_(false) + Checker(CompoundLibPtr lib, const mol::EntityHandle& ent, + Diagnostics& diags): lib_(lib), ent_(ent), diags_(diags), + checked_unk_res_(false) {} void CheckForUnknownAtoms(); void CheckForCompleteness(bool require_hydrogens=false); @@ -18,12 +18,11 @@ public: mol::AtomHandleList GetHydrogens(); mol::AtomHandleList GetZeroOccupancy(); - const std::vector<Diag*>& GetDiags() const { return diags_.GetDiags(); } private: - CompoundLibPtr lib_; - mol::EntityHandle ent_; - DiagEngine& diags_; - bool checked_unk_res_; + CompoundLibPtr lib_; + mol::EntityHandle ent_; + Diagnostics& diags_; + bool checked_unk_res_; }; }} /* ost::conop */ diff --git a/modules/conop/src/processor.cc b/modules/conop/src/processor.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1a3e6b6607dd68491cdfbb99a5350190ce8156f --- /dev/null +++ b/modules/conop/src/processor.cc @@ -0,0 +1,101 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/bond_handle.hh> +#include <ost/mol/torsion_handle.hh> +#include "processor.hh" + +namespace ost { namespace conop { + +DiagnosticsPtr Processor::Process(mol::EntityHandle ent) const +{ + DiagnosticsPtr diags(new Diagnostics); + if (!this->BeginProcessing(diags, ent)) { + return diags; + } + this->DoProcess(diags, ent); + + this->EndProcessing(diags, ent); + return diags; +} + +void AssignBackboneTorsions(mol::ChainHandle chain) +{ + AssignBackboneTorsions(chain.GetResidueList()); +} + +void AssignBackboneTorsions(mol::ResidueHandleList residues) +{ + if (residues.empty()) { return; } + mol::ResidueHandle r1; + mol::ResidueHandle r2; + mol::ResidueHandle r3 = residues.front(); + for (mol::ResidueHandleList::iterator + j = residues.begin()+1, e2 = residues.end(); j != e2; ++j) { + r1 = r2; + r2 = r3; + r3 = *j; + AssignBackboneTorsions(r1, r2, r3); + } + AssignBackboneTorsions(r2, r3, mol::ResidueHandle()); +} + + +void AssignBackboneTorsions(mol::ResidueHandle prev, + mol::ResidueHandle res, + mol::ResidueHandle next) +{ + + mol::XCSEditor e=res.GetEntity().EditXCS(mol::BUFFERED_EDIT); + //psi + if (next.IsValid() && next.IsPeptideLinking()){ + mol::AtomHandle ca_this=res.FindAtom("CA"); + mol::AtomHandle n_this=res.FindAtom("N"); + mol::AtomHandle c_this=res.FindAtom("C"); + mol::AtomHandle n_next=next.FindAtom("N"); + if ((ca_this && n_this && c_this && n_next && BondExists(c_this, n_next)) + && !res.GetPsiTorsion()) { + e.AddTorsion("PSI", n_this, ca_this, c_this, n_next); + } + }; + //phi + if (prev.IsValid() && prev.IsPeptideLinking()) { + mol::AtomHandle c_prev=prev.FindAtom("C"); + mol::AtomHandle n_this=res.FindAtom("N"); + mol::AtomHandle ca_this=res.FindAtom("CA"); + mol::AtomHandle c_this=res.FindAtom("C"); + if ((c_prev && n_this && ca_this && c_this && BondExists(c_prev, n_this)) + && !res.GetPhiTorsion()) { + e.AddTorsion("PHI", c_prev, n_this, ca_this, c_this); + } + } + //omega + if (prev.IsValid() && prev.IsPeptideLinking()) { + mol::AtomHandle ca_prev=prev.FindAtom("CA"); + mol::AtomHandle c_prev=prev.FindAtom("C"); + mol::AtomHandle n=res.FindAtom("N"); + mol::AtomHandle ca=res.FindAtom("CA"); + if ((ca_prev && c_prev && n && ca && BondExists(c_prev, n)) + && !res.GetOmegaTorsion()) { + e.AddTorsion("OMEGA",ca_prev , c_prev, n, ca); + } + } +} + +}} diff --git a/modules/conop/src/processor.hh b/modules/conop/src/processor.hh new file mode 100644 index 0000000000000000000000000000000000000000..daba929a3bf3d615c1e7a70410cd39c5c657c8ac --- /dev/null +++ b/modules/conop/src/processor.hh @@ -0,0 +1,159 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_CONOP_PROCESSOR_HH +#define OST_CONOP_PROCESSOR_HH + +#include <ost/mol/entity_handle.hh> +#include "module_config.hh" +#include "diag.hh" + +namespace ost { namespace conop { + +enum ConopAction { + CONOP_WARN = 0, + CONOP_SILENT, + CONOP_REMOVE, + CONOP_REMOVE_ATOM, + CONOP_REMOVE_RESIDUE, + CONOP_FATAL +}; + +struct ProcessorOptions { + ProcessorOptions(): check_bond_feasibility(false), assign_torsions(false), + connect(true), unk_atom_treatment(CONOP_WARN), unk_res_treatment(CONOP_WARN), + zero_occ_treatment(CONOP_SILENT) + { } + bool check_bond_feasibility; + bool assign_torsions; + bool connect; + ConopAction unk_atom_treatment; + ConopAction unk_res_treatment; + ConopAction zero_occ_treatment; +}; + +// the base class for all options +class DLLEXPORT_OST_CONOP Processor { +public: + DiagnosticsPtr Process(mol::EntityHandle ent) const; + + + virtual void PushFlags() = 0; + virtual void PopFlags() = 0; +protected: + virtual void DoProcess(DiagnosticsPtr diags, + mol::EntityHandle ent) const = 0; + virtual bool BeginProcessing(DiagnosticsPtr diags, + mol::EntityHandle ent) const { return true; } + virtual bool EndProcessing(DiagnosticsPtr diags, + mol::EntityHandle ent) const { return true; } +}; + +// Provides accessor methods for the basic Processor options. All +// processors should derive from this class +template <typename O> +class ProcessorBase : public Processor { +public: + typedef O option_type; + ProcessorBase(): option_stack_(1, O()) + {} + + void SetConnect(bool connect) { + this->GetOptions().connect = connect; + } + + bool GetConnect() const { + return this->GetOptions().connect; + } + void SetAssignTorsions(bool flag) { + this->GetOptions().assign_torsions = flag; + } + bool GetAssignTorsions() const { + return this->GetOptions().assign_torsions; + } + ConopAction GetUnkResidueTreatment() const { + return this->GetOptions().unk_res_treatment; + } + + ConopAction GetUnkAtomTreatment() const { + return this->GetOptions().unk_atom_treatment; + } + + bool GetCheckBondFeasibility() const { + return this->GetOptions().check_bond_feasibility; + } + + bool GetStrictHydrogens() const { + return this->GetOptions().strict_hydrogens; + } + + void SetStrictHydrogens(bool flag) { + this->GetOptions().strict_hydrogens = flag; + } + + void SetCheckBondFeasibility(bool flag) { + this->GetOptions().check_bond_feasibility = flag; + } + + void SetUnkResidueTreatment(ConopAction action) { + this->GetOptions().unk_res_treatment = action; + } + + void SetUnkAtomTreatment(ConopAction action) { + this->GetOptions().unk_atom_treatment = action; + } + + ConopAction GetZeroOccTreatment() const { + return this->GetOptions().zero_occ_treatment; + } + + void SetZeroOccTreatment(ConopAction action) { + this->GetOptions().zero_occ_treatment = action; + } + + virtual void PushFlags() { + option_stack_.push_back(option_stack_.back()); + } + + virtual void PopFlags() { + if (option_stack_.size() == 1) { + throw std::runtime_error("Can't pop from stack with one item left"); + } + option_stack_.pop_back(); + } +protected: + const option_type& GetOptions() const { return option_stack_.back(); } + option_type& GetOptions() { return option_stack_.back(); } +private: + std::vector<option_type> option_stack_; +}; + + +/// \brief assigns phi/psi/omega to all residues marked peptide-linking of +/// the chain +/// +/// Requires the atoms to be connected +void DLLEXPORT_OST_CONOP AssignBackboneTorsions(mol::ChainHandle chain); +void DLLEXPORT_OST_CONOP AssignBackboneTorsions(mol::ResidueHandleList residues); +void DLLEXPORT_OST_CONOP AssignBackboneTorsions(mol::ResidueHandle prev, + mol::ResidueHandle res, + mol::ResidueHandle next); + +}} + +#endif diff --git a/modules/conop/src/rule_based.cc b/modules/conop/src/rule_based.cc index 941ecade438239c733d03326389c831f551d44dd..3425fb076f84711621e5dc6c87e4fe124244608a 100644 --- a/modules/conop/src/rule_based.cc +++ b/modules/conop/src/rule_based.cc @@ -1,10 +1,31 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ #include <ost/log.hh> +#include <ost/profile.hh> #include <ost/mol/xcs_editor.hh> #include <ost/mol/bond_handle.hh> +#include <ost/mol/torsion_handle.hh> #include <ost/mol/impl/residue_impl.hh> #include <ost/mol/impl/atom_impl.hh> #include <ost/mol/residue_handle.hh> #include "rule_based.hh" + namespace ost { namespace conop { struct OrdinalAtomComp { @@ -14,8 +35,74 @@ struct OrdinalAtomComp { } }; -void RuleBasedConop::Process(mol::EntityHandle ent) +void RuleBasedProcessor::ProcessUnkResidue(DiagnosticsPtr diags, + mol::ResidueHandle res) const +{ + // Don't do anything if treatment is set to SILENT + if (this->GetUnkResidueTreatment() == CONOP_SILENT) { + return; + } + switch (this->GetUnkResidueTreatment()) { + case CONOP_WARN: + diags->AddDiag(DIAG_UNK_RESIDUE, "unknown residue %0") + .AddResidue(res); + break; + case CONOP_FATAL: + // FIXME: Implement a ConopError based on Diag... + break; + case CONOP_REMOVE_RESIDUE: + case CONOP_REMOVE: + diags->AddDiag(DIAG_UNK_RESIDUE, "removed unknown residue %0") + .AddResidue(res); + res.GetEntity().EditXCS().DeleteResidue(res); + return; + default: + assert(0 && "unhandled switch"); + } +} + +void RuleBasedProcessor::ProcessUnkAtoms(DiagnosticsPtr diags, + mol::AtomHandleList unks) const +{ + // Don't do anything if treatment is set to SILENT + if (this->GetUnkAtomTreatment() == CONOP_SILENT) { + return; + } + + assert(unks.empty() && "empty unk list"); + mol::XCSEditor edi = unks.front().GetEntity().EditXCS(); + for (mol::AtomHandleList::iterator + i = unks.begin(), e = unks.end(); i != e; ++i) { + switch (this->GetUnkAtomTreatment()) { + case CONOP_REMOVE_ATOM: + case CONOP_REMOVE: + edi.DeleteAtom(*i); + diags->AddDiag(DIAG_UNK_ATOM, "removed unknown atom %0 from residue %1") + .AddString(i->GetName()).AddResidue(i->GetResidue()); + break; + case CONOP_WARN: + diags->AddDiag(DIAG_UNK_ATOM, "residue %0 contains unknown atom %1") + .AddResidue(i->GetResidue()).AddString(i->GetName()); + break; + case CONOP_FATAL: + // FIXME: Implement a ConopError based on Diag... + break; + case CONOP_REMOVE_RESIDUE: + diags->AddDiag(DIAG_UNK_ATOM, "removed residue %0 with unknown atom %1") + .AddString(i->GetResidue().GetQualifiedName()) + .AddString(i->GetName()); + edi.DeleteResidue(i->GetResidue()); + return; + default: + assert(0 && "unhandled switch"); + } + } +} + +void RuleBasedProcessor::DoProcess(DiagnosticsPtr diags, + mol::EntityHandle ent) const { + Profile prof("RuleBasedProcessor::Process"); mol::ChainHandleList chains=ent.GetChainList(); for (mol::ChainHandleList::iterator i = chains.begin(), e = chains.end(); i!= e; ++i) { @@ -28,28 +115,37 @@ void RuleBasedConop::Process(mol::EntityHandle ent) CompoundPtr compound = lib_->FindCompound(residue.GetName(), Compound::PDB); if (!compound) { // process unknown residue... + this->ProcessUnkResidue(diags, residue); continue; } this->ReorderAtoms(residue, compound); bool unks = this->HasUnknownAtoms(residue, compound); if (unks) { - LOG_WARNING("residue " << residue << " doesn't look like a standard " - << residue.GetKey() << " (" << compound->GetFormula() << ")"); + mol::AtomHandleList unk_atoms; + unk_atoms = GetUnknownAtomsOfResidue(residue, compound, + this->GetStrictHydrogens()); + this->ProcessUnkAtoms(diags, unk_atoms); residue.SetChemClass(mol::ChemClass(mol::ChemClass::UNKNOWN)); residue.SetChemType(mol::ChemType(mol::ChemType::UNKNOWN)); residue.SetOneLetterCode('?'); continue; } this->FillResidueProps(residue, compound); - this->ConnectAtomsOfResidue(residue, compound); - this->ConnectResidues(prev, residue); + if (this->GetConnect()) { + this->ConnectAtomsOfResidue(residue, compound); + this->ConnectResidues(prev, residue); + } prev = residue; } + if (residues.empty() || !this->GetAssignTorsions()) { + continue; + } + AssignBackboneTorsions(residues); } } -void RuleBasedConop::ConnectResidues(mol::ResidueHandle rh, - mol::ResidueHandle next) +void RuleBasedProcessor::ConnectResidues(mol::ResidueHandle rh, + mol::ResidueHandle next) const { if (!next.IsValid() || !rh.IsValid()) { return; @@ -81,16 +177,16 @@ void RuleBasedConop::ConnectResidues(mol::ResidueHandle rh, } } -void RuleBasedConop::FillResidueProps(mol::ResidueHandle residue, - CompoundPtr compound) +void RuleBasedProcessor::FillResidueProps(mol::ResidueHandle residue, + CompoundPtr compound) const { residue.SetChemClass(compound->GetChemClass()); residue.SetChemType(compound->GetChemType()); residue.SetOneLetterCode(compound->GetOneLetterCode()); } -mol::AtomHandle RuleBasedConop::LocateAtom(const mol::AtomHandleList& ahl, - int ordinal) +mol::AtomHandle RuleBasedProcessor::LocateAtom(const mol::AtomHandleList& ahl, + int ordinal) const { if (ahl.empty()) return mol::AtomHandle(); @@ -108,7 +204,8 @@ mol::AtomHandle RuleBasedConop::LocateAtom(const mol::AtomHandleList& ahl, } -void RuleBasedConop::ConnectAtomsOfResidue(mol::ResidueHandle rh, CompoundPtr compound) +void RuleBasedProcessor::ConnectAtomsOfResidue(mol::ResidueHandle rh, + CompoundPtr compound) const { //if (!compound) { @@ -127,16 +224,16 @@ void RuleBasedConop::ConnectAtomsOfResidue(mol::ResidueHandle rh, CompoundPtr co mol::AtomHandle a1=this->LocateAtom(atoms, bond.atom_one); mol::AtomHandle a2=this->LocateAtom(atoms, bond.atom_two); if (a1.IsValid() && a2.IsValid()) { - if (!check_bond_feasibility_) { - if (strict_hydrogens_ && (a1.GetElement()=="H" || - a2.GetElement()=="D")) { + if (!this->GetCheckBondFeasibility()) { + if (this->GetStrictHydrogens() && (a1.GetElement()=="H" || + a2.GetElement()=="D")) { continue; } e.Connect(a1, a2, bond.order); } else { if (this->IsBondFeasible(a1, a2)) { - if (strict_hydrogens_ && (a1.GetElement()=="H" || - a2.GetElement()=="D")) { + if (this->GetStrictHydrogens() && (a1.GetElement()=="H" || + a2.GetElement()=="D")) { continue; } e.Connect(a1, a2, bond.order); @@ -144,16 +241,10 @@ void RuleBasedConop::ConnectAtomsOfResidue(mol::ResidueHandle rh, CompoundPtr co } } } - //for (mol::AtomHandleList::iterator i=atoms.begin(), e=atoms.end(); i!=e; ++i) { - // if (((*i).GetElement()=="H" || (*i).GetElement()=="D") && - // (*i).GetBondCount()==0) { - // this->DistanceBasedConnect(*i); - // } - // } } -bool RuleBasedConop::IsBondFeasible(const mol::AtomHandle& atom_a, - const mol::AtomHandle& atom_b) +bool RuleBasedProcessor::IsBondFeasible(const mol::AtomHandle& atom_a, + const mol::AtomHandle& atom_b) const { Real radii=0.0; if (atom_a.GetRadius()>0.0) { @@ -171,8 +262,9 @@ bool RuleBasedConop::IsBondFeasible(const mol::AtomHandle& atom_a, Real upper_bound=lower_bound*6.0; return (len<=upper_bound && len>=lower_bound); } -void RuleBasedConop::ReorderAtoms(mol::ResidueHandle residue, - CompoundPtr compound) + +void RuleBasedProcessor::ReorderAtoms(mol::ResidueHandle residue, + CompoundPtr compound) const { mol::impl::ResidueImplPtr impl=residue.Impl(); mol::impl::AtomImplList::iterator i=impl->GetAtomList().begin(); @@ -185,12 +277,17 @@ void RuleBasedConop::ReorderAtoms(mol::ResidueHandle residue, continue; } atom->SetState((compound->GetAtomSpecs())[index].ordinal); + // override element + if (this->GetFixElement()) { + atom->SetElement((compound->GetAtomSpecs())[index].element); + } } std::sort(impl->GetAtomList().begin(), impl->GetAtomList().end(), OrdinalAtomComp()); } -bool RuleBasedConop::HasUnknownAtoms(mol::ResidueHandle res, CompoundPtr compound) +bool RuleBasedProcessor::HasUnknownAtoms(mol::ResidueHandle res, + CompoundPtr compound) const { AtomSpecList::const_iterator j=compound->GetAtomSpecs().begin(); mol::AtomHandleList atoms=res.GetAtomList(); @@ -199,7 +296,7 @@ bool RuleBasedConop::HasUnknownAtoms(mol::ResidueHandle res, CompoundPtr compoun i=atoms.begin(), e=atoms.end(); i!=e; ++i) { if ((*i).Impl()->GetState()==std::numeric_limits<unsigned int>::max()) { if (((*i).GetElement()=="H" || (*i).GetElement()=="D") && - !strict_hydrogens_) { + !this->GetStrictHydrogens()) { continue; } return true; @@ -207,4 +304,29 @@ bool RuleBasedConop::HasUnknownAtoms(mol::ResidueHandle res, CompoundPtr compoun } return false; } + +mol::AtomHandleList GetUnknownAtomsOfResidue(mol::ResidueHandle res, + CompoundPtr compound, + bool strict_hydrogens) +{ + mol::AtomHandleList atoms=res.GetAtomList(); + mol::AtomHandleList unks; + const AtomSpecList& atom_specs=compound->GetAtomSpecs(); + for (mol::AtomHandleList::const_iterator + j=atoms.begin(), e2=atoms.end(); j!=e2; ++j) { + bool found=false; + for (AtomSpecList::const_iterator + k=atom_specs.begin(), e3=atom_specs.end(); k!=e3; ++k) { + if (k->name==j->GetName() || k->alt_name==j->GetName()) { + found=true; + break; + } + } + if (!found) { + unks.push_back(*j); + } + } + return unks; +} + }} diff --git a/modules/conop/src/rule_based.hh b/modules/conop/src/rule_based.hh index 2a8c95b8dfe8b8a85412342f8b7955e9386238b9..62b2417f384da072fc110ffa7464f75f35fa444f 100644 --- a/modules/conop/src/rule_based.hh +++ b/modules/conop/src/rule_based.hh @@ -1,32 +1,89 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ #ifndef OST_CONOP_RULE_BASED_HH #define OST_CONOP_RULE_BASED_HH #include <ost/mol/entity_handle.hh> #include "compound_lib.hh" #include "diag.hh" +#include "processor.hh" + namespace ost { namespace conop { +struct RuleBasedProcessorOptions : ProcessorOptions { + RuleBasedProcessorOptions(): ProcessorOptions(), + strict_hydrogens(false), fix_element(true) + {} + bool strict_hydrogens; + bool fix_element; +}; + +/// \brief returns all atoms not listed in the specifictaion of compound +mol::AtomHandleList DLLEXPORT_OST_CONOP GetUnknownAtoms(mol::ResidueHandle res, + CompoundPtr compound); -class DLLEXPORT_OST_CONOP RuleBasedConop { +class DLLEXPORT_OST_CONOP RuleBasedProcessor : + public ProcessorBase<RuleBasedProcessorOptions> { public: - RuleBasedConop(CompoundLibPtr compound_lib): lib_(compound_lib), - strict_hydrogens_(false), check_bond_feasibility_(false) {} - - void Process(mol::EntityHandle ent); + RuleBasedProcessor(CompoundLibPtr compound_lib): + ProcessorBase<RuleBasedProcessorOptions>(), lib_(compound_lib) + { + } + + bool GetStrictHydrogens() const { + return this->GetOptions().strict_hydrogens; + } + void SetStrictHydrogens(bool flag) { + this->GetOptions().strict_hydrogens = flag; + } + + bool GetFixElement() const { + return this->GetOptions().fix_element; + } + void SetFixElement(bool flag) { + this->GetOptions().fix_element = flag; + } +protected: + virtual void DoProcess(DiagnosticsPtr diags, + mol::EntityHandle ent) const; private: - bool HasUnknownAtoms(mol::ResidueHandle residue, CompoundPtr compound); - void ReorderAtoms(mol::ResidueHandle residue, CompoundPtr compound); - void FillResidueProps(mol::ResidueHandle residue, CompoundPtr compound); - void ConnectAtomsOfResidue(mol::ResidueHandle residue, CompoundPtr compound); - void ConnectResidues(mol::ResidueHandle residue, mol::ResidueHandle next); - bool IsBondFeasible(const mol::AtomHandle&, const mol::AtomHandle&); - mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal); + void ProcessUnkResidue(DiagnosticsPtr diags, + mol::ResidueHandle res) const; + void ProcessUnkAtoms(DiagnosticsPtr diags, + mol::AtomHandleList unks) const; + bool HasUnknownAtoms(mol::ResidueHandle residue, CompoundPtr compound) const; + void ReorderAtoms(mol::ResidueHandle residue, CompoundPtr compound) const; + void FillResidueProps(mol::ResidueHandle residue, CompoundPtr compound) const; + void ConnectAtomsOfResidue(mol::ResidueHandle residue, CompoundPtr compound) const; + void ConnectResidues(mol::ResidueHandle residue, mol::ResidueHandle next) const; + bool IsBondFeasible(const mol::AtomHandle&, const mol::AtomHandle&) const; + mol::AtomHandle LocateAtom(const mol::AtomHandleList&, int ordinal) const; CompoundLibPtr lib_; - bool strict_hydrogens_; - bool check_bond_feasibility_; }; + +mol::AtomHandleList DLLEXPORT_OST_CONOP +GetUnknownAtomsOfResidue(mol::ResidueHandle residue, + CompoundPtr compound, + bool strict_hydrogens=false); + }} #endif diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 6a832115b5933b66241c9dec5ce23c21dd532c48..a318766e1c6355c55063b487c1a9c6c5b1a51e95 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -1,8 +1,9 @@ set(OST_CONOP_UNIT_TESTS test_heuristic_builder.cc - test_rule_based_builder.cc tests.cc + test_rule_based_conop.cc test_builder.cc + helper.cc test_compound.py test_cleanup.py test_nonstandard.py diff --git a/modules/conop/tests/helper.cc b/modules/conop/tests/helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..9090a74532bf058f332f73d4f1e486ebbe5cea2d --- /dev/null +++ b/modules/conop/tests/helper.cc @@ -0,0 +1,331 @@ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <ost/mol/xcs_editor.hh> +#include <ost/mol/bond_handle.hh> + +#include "helper.hh" + +using namespace ost::mol; + +namespace ost { namespace conop { + +ResidueHandle make_cytosine(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res=e.AppendResidue(chain, "DC"); + + e.InsertAtom(res, "P", geom::Vec3(21.412, 34.391, 37.142)); + e.InsertAtom(res, "OP1", geom::Vec3(22.938, 34.599, 36.988)); + e.InsertAtom(res, "OP2", geom::Vec3(20.690, 35.640, 37.689)); + e.InsertAtom(res, "O5'", geom::Vec3(21.215, 33.299, 38.339)); + e.InsertAtom(res, "C5'", geom::Vec3(20.524, 33.660, 39.548)); + e.InsertAtom(res, "C4'", geom::Vec3(19.064, 33.285, 39.452)); + e.InsertAtom(res, "O4'", geom::Vec3(18.246, 34.411, 39.034)); + e.InsertAtom(res, "C3'", geom::Vec3(18.778, 32.150, 38.469)); + e.InsertAtom(res, "O3'", geom::Vec3(17.930, 31.199, 39.081)); + e.InsertAtom(res, "C2'", geom::Vec3(18.046, 32.827, 37.330)); + e.InsertAtom(res, "C1'", geom::Vec3(17.326, 33.955, 38.048)); + e.InsertAtom(res, "N1", geom::Vec3(16.891, 35.100, 37.196)); + e.InsertAtom(res, "C2", geom::Vec3(15.709, 35.782, 37.543)); + e.InsertAtom(res, "O2", geom::Vec3(15.137, 35.489, 38.610)); + e.InsertAtom(res, "N3", geom::Vec3(15.226, 36.742, 36.710)); + e.InsertAtom(res, "C4", geom::Vec3(15.878, 37.041, 35.582)); + e.InsertAtom(res, "N4", geom::Vec3(15.324, 37.943, 34.762)); + e.InsertAtom(res, "C5", geom::Vec3(17.116, 36.415, 35.238)); + e.InsertAtom(res, "C6", geom::Vec3(17.584, 35.464, 36.067)); + + return res; +} + +ResidueHandle make_uracil1(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res = e.AppendResidue(chain, "U"); + + e.InsertAtom(res, "P", geom::Vec3(18.533, 30.238, 40.226)); + e.InsertAtom(res, "OP1", geom::Vec3(20.012, 30.098, 40.003)); + e.InsertAtom(res, "OP2", geom::Vec3(17.680, 29.017, 40.258)); + e.InsertAtom(res, "O5'", geom::Vec3(18.237, 31.056, 41.570)); + e.InsertAtom(res, "C5'", geom::Vec3(17.104, 31.923, 41.642)); + e.InsertAtom(res, "C4'", geom::Vec3(16.332, 31.729, 42.937)); + e.InsertAtom(res, "O4'", geom::Vec3(15.180, 32.616, 42.841)); + e.InsertAtom(res, "C3'", geom::Vec3(15.785, 30.319, 43.194)); + e.InsertAtom(res, "O3'", geom::Vec3(15.960, 29.834, 44.527)); + e.InsertAtom(res, "C2'", geom::Vec3(14.311, 30.371, 42.766)); + e.InsertAtom(res, "O2'", geom::Vec3(13.442, 29.708, 43.677)); + e.InsertAtom(res, "C1'", geom::Vec3(13.972, 31.863, 42.765)); + e.InsertAtom(res, "N1", geom::Vec3(13.282, 32.212, 41.499)); + e.InsertAtom(res, "C2", geom::Vec3(12.072, 32.896, 41.568)); + e.InsertAtom(res, "O2", geom::Vec3(11.510, 33.142, 42.626)); + e.InsertAtom(res, "N3", geom::Vec3(11.535, 33.257, 40.349)); + e.InsertAtom(res, "C4", geom::Vec3(12.048, 32.976, 39.096)); + e.InsertAtom(res, "O4", geom::Vec3(11.490, 33.447, 38.087)); + e.InsertAtom(res, "C5", geom::Vec3(13.268, 32.207, 39.106)); + e.InsertAtom(res, "C6", geom::Vec3(13.831, 31.872, 40.261)); + + return res; +} + +ResidueHandle make_uracil2(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res = e.AppendResidue(chain, "U"); + + e.InsertAtom(res, "P", geom::Vec3(16.249, 28.254, 44.759)); + e.InsertAtom(res, "OP1", geom::Vec3(16.654, 28.055, 46.181)); + e.InsertAtom(res, "OP2", geom::Vec3(17.115, 27.731, 43.656)); + e.InsertAtom(res, "O5'", geom::Vec3(14.826, 27.572, 44.574)); + e.InsertAtom(res, "C5'", geom::Vec3(14.711, 26.206, 44.216)); + e.InsertAtom(res, "C4'", geom::Vec3(13.279, 25.889, 43.887)); + e.InsertAtom(res, "O4'", geom::Vec3(12.832, 26.793, 42.838)); + e.InsertAtom(res, "C3'", geom::Vec3(13.155, 24.434, 43.329)); + e.InsertAtom(res, "O3'", geom::Vec3(12.269, 23.625, 44.098)); + e.InsertAtom(res, "C2'", geom::Vec3(12.871, 24.595, 41.875)); + e.InsertAtom(res, "O2'", geom::Vec3(11.811, 23.752, 41.462)); + e.InsertAtom(res, "C1'", geom::Vec3(12.424, 26.056, 41.694)); + e.InsertAtom(res, "N1", geom::Vec3(13.030, 26.692, 40.497)); + e.InsertAtom(res, "C2", geom::Vec3(12.517, 26.365, 39.228)); + e.InsertAtom(res, "O2", geom::Vec3(11.579, 25.594, 39.068)); + e.InsertAtom(res, "N3", geom::Vec3(13.141, 26.987, 38.161)); + e.InsertAtom(res, "C4", geom::Vec3(14.197, 27.888, 38.210)); + e.InsertAtom(res, "O4", geom::Vec3(14.627, 28.368, 37.156)); + e.InsertAtom(res, "C5", geom::Vec3(14.671, 28.189, 39.542)); + e.InsertAtom(res, "C6", geom::Vec3(14.087, 27.597, 40.612)); + + return res; +} + +ResidueHandle make_defective_uracil2(ChainHandle chain) +{ + XCSEditor e=chain.GetEntity().EditXCS(); + ResidueHandle res = e.AppendResidue(chain, "U"); + + e.InsertAtom(res, "P", geom::Vec3(16.249, 28.254, 44.759)); + e.InsertAtom(res, "OP1", geom::Vec3(16.654, 28.055, 46.181)); + e.InsertAtom(res, "OP2", geom::Vec3(17.115, 27.731, 43.656)); + e.InsertAtom(res, "O5'", geom::Vec3(14.826, 27.572, 44.574)); + e.InsertAtom(res, "C5'", geom::Vec3(14.711, 26.206, 44.216)); + e.InsertAtom(res, "C4'", geom::Vec3(13.279, 25.889, 43.887)); + e.InsertAtom(res, "O4'", geom::Vec3(12.832, 26.793, 42.838)); + e.InsertAtom(res, "C3'", geom::Vec3(13.155, 24.434, 43.329)); + e.InsertAtom(res, "O3'", geom::Vec3(12.269, 23.625, 44.098)); + e.InsertAtom(res, "C2'", geom::Vec3(12.871, 24.595, 41.875)); + e.InsertAtom(res, "O2'", geom::Vec3(11.811, 23.752, 41.462)); + e.InsertAtom(res, "C1'", geom::Vec3(12.424, 26.056, 41.694)); + e.InsertAtom(res, "N1", geom::Vec3(13.030, 26.692, 40.497)); + e.InsertAtom(res, "C2", geom::Vec3(12.517, 26.365, 39.228)); + e.InsertAtom(res, "O2", geom::Vec3(11.579, 25.594, 39.068)); + e.InsertAtom(res, "N3", geom::Vec3(13.141, 26.987, 38.161)); + e.InsertAtom(res, "C4", geom::Vec3(14.197, 27.888, 38.210)); + e.InsertAtom(res, "O4", geom::Vec3(14.627, 28.368, 37.156)); + e.InsertAtom(res, "C5", geom::Vec3(14.671, 28.189, 39.542)); + e.InsertAtom(res, "C6", geom::Vec3(14.087, 27.597, 80.612)); + + return res; +} +ResidueHandle make_1zk(ChainHandle chain) +{ + XCSEditor edi=chain.GetEntity().EditXCS(); + ResidueHandle r=edi.AppendResidue(chain, "1ZK"); + edi.InsertAtom(r, "C", geom::Vec3(3.946, 3.520, 10.861),"C"); + edi.InsertAtom(r, "C1", geom::Vec3(6.308, 3.824, 8.036),"C"); + edi.InsertAtom(r, "CA", geom::Vec3(4.856, 4.282, 9.931),"C"); + edi.InsertAtom(r, "C2", geom::Vec3(6.770, 5.099, 8.019),"C"); + edi.InsertAtom(r, "C3", geom::Vec3(7.805, 5.484, 7.142),"C"); + edi.InsertAtom(r, "C4", geom::Vec3(8.363, 4.590, 6.284),"C"); + edi.InsertAtom(r, "C5", geom::Vec3(8.469, 2.345, 5.318),"C"); + edi.InsertAtom(r, "C6", geom::Vec3(7.986, 1.072, 5.322),"C"); + edi.InsertAtom(r, "C4A", geom::Vec3(7.885, 3.277, 6.208),"C"); + edi.InsertAtom(r, "C7", geom::Vec3(6.949, 0.682, 6.190),"C"); + edi.InsertAtom(r, "C8", geom::Vec3(6.398, 1.548, 7.074),"C"); + edi.InsertAtom(r, "C9", geom::Vec3(4.018, 0.269, 12.183),"C"); + edi.InsertAtom(r, "C1A", geom::Vec3(6.824, 2.882, 7.117),"C"); + edi.InsertAtom(r, "O", geom::Vec3(3.224, 4.213, 11.565),"O"); + edi.InsertAtom(r, "O1", geom::Vec3(5.331, 3.476, 8.904),"O"); + edi.InsertAtom(r, "O3", geom::Vec3(5.748, -4.044, 14.612),"O"); + edi.InsertAtom(r, "N", geom::Vec3(3.855, 2.201, 10.790),"N"); + edi.InsertAtom(r, "N1", geom::Vec3(4.814, 0.719, 13.173),"N"); + edi.InsertAtom(r, "CA1", geom::Vec3(3.077, 1.346, 11.644),"C"); + edi.InsertAtom(r, "O2", geom::Vec3(4.203, -0.810, 11.651),"O"); + edi.InsertAtom(r, "O4", geom::Vec3(7.311, -5.667, 18.880),"O"); + edi.InsertAtom(r, "CB", geom::Vec3(1.856, 0.712, 11.049),"C"); + edi.InsertAtom(r, "CG", geom::Vec3(1.015, 1.845, 10.511),"C"); + edi.InsertAtom(r, "ND1", geom::Vec3(1.467, 2.439, 9.321),"N"); + edi.InsertAtom(r, "N2", geom::Vec3(6.478, -3.958, 16.751),"N"); + edi.InsertAtom(r, "CD2", geom::Vec3(-0.105, 2.479, 10.887),"C"); + edi.InsertAtom(r, "CE1", geom::Vec3(0.638, 3.428, 9.002),"C"); + edi.InsertAtom(r, "NE2", geom::Vec3(-0.372, 3.461, 9.881),"N"); + edi.InsertAtom(r, "N3", geom::Vec3(6.871, -7.136, 17.332),"N"); + edi.InsertAtom(r, "CA2", geom::Vec3(5.881, -0.001, 13.808),"C"); + edi.InsertAtom(r, "CB1", geom::Vec3(7.140, 0.860, 13.743),"C"); + edi.InsertAtom(r, "CG1", geom::Vec3(7.503, 1.324, 12.299),"C"); + edi.InsertAtom(r, "C21", geom::Vec3(5.126, -8.557, 18.179),"C"); + edi.InsertAtom(r, "CD1", geom::Vec3(8.185, 0.142, 11.507),"C"); + edi.InsertAtom(r, "CD21", geom::Vec3(8.420, 2.537, 12.325),"C"); + edi.InsertAtom(r, "CE11", geom::Vec3(8.381, 0.689, 10.066),"C"); + edi.InsertAtom(r, "CE2", geom::Vec3(8.907, 2.979, 10.922),"C"); + edi.InsertAtom(r, "CZ", geom::Vec3(9.409, 1.807, 10.075),"C"); + edi.InsertAtom(r, "CH", geom::Vec3(5.592, -0.511, 15.204),"C"); + edi.InsertAtom(r, "OH", geom::Vec3(5.225, 0.377, 16.238),"O"); + edi.InsertAtom(r, "CB11", geom::Vec3(4.426, -1.543, 15.170),"C"); + edi.InsertAtom(r, "CA'", geom::Vec3(4.451, -2.730, 16.152),"C"); + edi.InsertAtom(r, "CB'", geom::Vec3(3.124, -3.441, 16.281),"C"); + edi.InsertAtom(r, "CG11", geom::Vec3(2.553, -3.986, 14.933),"C"); + edi.InsertAtom(r, "C31", geom::Vec3(4.413, -7.811, 19.117),"C"); + edi.InsertAtom(r, "CG2", geom::Vec3(3.204, -4.586, 17.345),"C"); + edi.InsertAtom(r, "OB1", geom::Vec3(3.249, -0.875, 15.134),"O"); + edi.InsertAtom(r, "CC", geom::Vec3(5.603, -3.655, 15.782),"C"); + edi.InsertAtom(r, "CA3", geom::Vec3(7.592, -4.867, 16.603),"C"); + edi.InsertAtom(r, "CD", geom::Vec3(7.274, -5.947, 17.691),"C"); + edi.InsertAtom(r, "CB2", geom::Vec3(8.986, -4.351, 16.803),"C"); + edi.InsertAtom(r, "CG12", geom::Vec3(9.488, -3.108, 16.016),"C"); + edi.InsertAtom(r, "CG21", geom::Vec3(10.000, -5.461, 16.472),"C"); + edi.InsertAtom(r, "CD11", geom::Vec3(9.099, -3.257, 14.571),"C"); + edi.InsertAtom(r, "CM", geom::Vec3(6.587, -8.286, 18.106),"C"); + edi.InsertAtom(r, "C41", geom::Vec3(3.045, -7.980, 19.287),"C"); + edi.InsertAtom(r, "C51", geom::Vec3(2.423, -8.911, 18.456),"C"); + edi.InsertAtom(r, "C61", geom::Vec3(3.164, -9.631, 17.518),"C"); + edi.InsertAtom(r, "N11", geom::Vec3(4.497, -9.459, 17.386),"N"); + return r; +} + +void verify_nucleotide_connectivity(const ResidueHandle& res) +{ + BOOST_CHECK(BondExists(res.FindAtom("P"), + res.FindAtom("OP1"))); + BOOST_CHECK(BondExists(res.FindAtom("P"), + res.FindAtom("OP2"))); + BOOST_CHECK(BondExists(res.FindAtom("P"), + res.FindAtom("O5'"))); + BOOST_CHECK(BondExists(res.FindAtom("O5'"), + res.FindAtom("C5'"))); + BOOST_CHECK(BondExists(res.FindAtom("C5'"), + res.FindAtom("C4'"))); + BOOST_CHECK(BondExists(res.FindAtom("C4'"), + res.FindAtom("O4'"))); + BOOST_CHECK(BondExists(res.FindAtom("C4'"), + res.FindAtom("C3'"))); + BOOST_CHECK(BondExists(res.FindAtom("C3'"), + res.FindAtom("O3'"))); + BOOST_CHECK(BondExists(res.FindAtom("C3'"), + res.FindAtom("C2'"))); + BOOST_CHECK(BondExists(res.FindAtom("C2'"), + res.FindAtom("C1'"))); + BOOST_CHECK(BondExists(res.FindAtom("C1'"), + res.FindAtom("O4'"))); + + if (res.GetKey()=="DC") { + BOOST_CHECK(BondExists(res.FindAtom("C1'"), + res.FindAtom("N1"))); + BOOST_CHECK(BondExists(res.FindAtom("N1"), + res.FindAtom("C2"))); + BOOST_CHECK(BondExists(res.FindAtom("C2"), + res.FindAtom("O2"))); + BOOST_CHECK(BondExists(res.FindAtom("C2"), + res.FindAtom("N3"))); + BOOST_CHECK(BondExists(res.FindAtom("N3"), + res.FindAtom("C4"))); + BOOST_CHECK(BondExists(res.FindAtom("C4"), + res.FindAtom("N4"))); + BOOST_CHECK(BondExists(res.FindAtom("C4"), + res.FindAtom("C5"))); + BOOST_CHECK(BondExists(res.FindAtom("C5"), + res.FindAtom("C6"))); + BOOST_CHECK(BondExists(res.FindAtom("C6"), + res.FindAtom("N1"))); + // TODO: Check that no other atoms are connected! + } + + if (res.GetKey()=="U") { + BOOST_CHECK(BondExists(res.FindAtom("O2'"), + res.FindAtom("C2'"))); + BOOST_CHECK(BondExists(res.FindAtom("C1'"), + res.FindAtom("N1"))); + BOOST_CHECK(BondExists(res.FindAtom("N1"), + res.FindAtom("C2"))); + BOOST_CHECK(BondExists(res.FindAtom("C2"), + res.FindAtom("O2"))); + BOOST_CHECK(BondExists(res.FindAtom("C2"), + res.FindAtom("N3"))); + BOOST_CHECK(BondExists(res.FindAtom("N3"), + res.FindAtom("C4"))); + BOOST_CHECK(BondExists(res.FindAtom("C4"), + res.FindAtom("O4"))); + BOOST_CHECK(BondExists(res.FindAtom("C4"), + res.FindAtom("C5"))); + BOOST_CHECK(BondExists(res.FindAtom("C5"), + res.FindAtom("C6"))); + BOOST_CHECK(BondExists(res.FindAtom("C6"), + res.FindAtom("N1"))); + // TODO: Check that no other atoms are connected! + } +} +void verify_1zk_connectivity(const ResidueHandle& r1) +{ + BOOST_CHECK(BondExists(r1.FindAtom("C"), r1.FindAtom("CA"))); + BOOST_CHECK(BondExists(r1.FindAtom("C"), r1.FindAtom("O"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA"), r1.FindAtom("O1"))); + BOOST_CHECK(BondExists(r1.FindAtom("C1"), r1.FindAtom("C2"))); + BOOST_CHECK(BondExists(r1.FindAtom("C1"), r1.FindAtom("C1A"))); + BOOST_CHECK(BondExists(r1.FindAtom("C1"), r1.FindAtom("O1"))); + BOOST_CHECK(BondExists(r1.FindAtom("C2"), r1.FindAtom("C3"))); + BOOST_CHECK(BondExists(r1.FindAtom("C3"), r1.FindAtom("C4"))); + BOOST_CHECK(BondExists(r1.FindAtom("C4"), r1.FindAtom("C4A"))); + BOOST_CHECK(BondExists(r1.FindAtom("C4A"), r1.FindAtom("C5"))); + BOOST_CHECK(BondExists(r1.FindAtom("C4A"), r1.FindAtom("C1A"))); + BOOST_CHECK(BondExists(r1.FindAtom("C5"), r1.FindAtom("C6"))); + BOOST_CHECK(BondExists(r1.FindAtom("C6"), r1.FindAtom("C7"))); + BOOST_CHECK(BondExists(r1.FindAtom("C7"), r1.FindAtom("C8"))); + BOOST_CHECK(BondExists(r1.FindAtom("C8"), r1.FindAtom("C1A"))); + BOOST_CHECK(BondExists(r1.FindAtom("N"), r1.FindAtom("CA1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA1"), r1.FindAtom("C9"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA1"), r1.FindAtom("CB"))); + BOOST_CHECK(BondExists(r1.FindAtom("C9"), r1.FindAtom("O2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB"), r1.FindAtom("CG"))); + BOOST_CHECK(BondExists(r1.FindAtom("CG"), r1.FindAtom("ND1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CG"), r1.FindAtom("CD2"))); + BOOST_CHECK(BondExists(r1.FindAtom("ND1"), r1.FindAtom("CE1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CD2"), r1.FindAtom("NE2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CE1"), r1.FindAtom("NE2"))); + BOOST_CHECK(BondExists(r1.FindAtom("N1"), r1.FindAtom("CA2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA2"), r1.FindAtom("CB1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA2"), r1.FindAtom("CH"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB1"), r1.FindAtom("CG1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CG1"), r1.FindAtom("CD1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CG1"), r1.FindAtom("CD21"))); + BOOST_CHECK(BondExists(r1.FindAtom("CD1"), r1.FindAtom("CE11"))); + BOOST_CHECK(BondExists(r1.FindAtom("CD21"), r1.FindAtom("CE2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CE11"), r1.FindAtom("CZ"))); + BOOST_CHECK(BondExists(r1.FindAtom("CE2"), r1.FindAtom("CZ"))); + BOOST_CHECK(BondExists(r1.FindAtom("CH"), r1.FindAtom("OH"))); + BOOST_CHECK(BondExists(r1.FindAtom("CH"), r1.FindAtom("CB11"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB11"), r1.FindAtom("CA'"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB11"), r1.FindAtom("OB1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA'"), r1.FindAtom("CB'"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA'"), r1.FindAtom("CC"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB'"), r1.FindAtom("CG11"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB'"), r1.FindAtom("CG2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CC"), r1.FindAtom("O3"))); + BOOST_CHECK(BondExists(r1.FindAtom("N2"), r1.FindAtom("CA3"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA3"), r1.FindAtom("CD"))); + BOOST_CHECK(BondExists(r1.FindAtom("CA3"), r1.FindAtom("CB2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CD"), r1.FindAtom("O4"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB2"), r1.FindAtom("CG12"))); + BOOST_CHECK(BondExists(r1.FindAtom("CB2"), r1.FindAtom("CG21"))); + BOOST_CHECK(BondExists(r1.FindAtom("CG12"), r1.FindAtom("CD11"))); + BOOST_CHECK(BondExists(r1.FindAtom("N3"), r1.FindAtom("CM"))); + BOOST_CHECK(BondExists(r1.FindAtom("CM"), r1.FindAtom("C21"))); + BOOST_CHECK(BondExists(r1.FindAtom("C21"), r1.FindAtom("C31"))); + BOOST_CHECK(BondExists(r1.FindAtom("C21"), r1.FindAtom("N11"))); + BOOST_CHECK(BondExists(r1.FindAtom("C31"), r1.FindAtom("C41"))); + BOOST_CHECK(BondExists(r1.FindAtom("C41"), r1.FindAtom("C51"))); + BOOST_CHECK(BondExists(r1.FindAtom("C51"), r1.FindAtom("C61"))); + BOOST_CHECK(BondExists(r1.FindAtom("C61"), r1.FindAtom("N11"))); + BOOST_CHECK(BondExists(r1.FindAtom("C"), r1.FindAtom("N"))); + BOOST_CHECK(BondExists(r1.FindAtom("C9"), r1.FindAtom("N1"))); + BOOST_CHECK(BondExists(r1.FindAtom("CC"), r1.FindAtom("N2"))); + BOOST_CHECK(BondExists(r1.FindAtom("CD"), r1.FindAtom("N3"))); +} + +}} + diff --git a/modules/conop/tests/helper.hh b/modules/conop/tests/helper.hh new file mode 100644 index 0000000000000000000000000000000000000000..43b70ed306ead19127f2f5552f51ec0509210447 --- /dev/null +++ b/modules/conop/tests/helper.hh @@ -0,0 +1,24 @@ +#ifndef OST_CONOP_TEST_HELPERS_HH +#define OST_CONOP_TEST_HELPERS_HH + +#include <ost/mol/entity_handle.hh> +#include <ost/mol/residue_handle.hh> + +namespace ost { namespace conop { + +mol::ResidueHandle make_uracyl(mol::ChainHandle chain); +void verify_1zk_connectivity(const mol::ResidueHandle& r1); +mol::ResidueHandle make_cytosine(mol::ChainHandle chain); +mol::ResidueHandle make_uracil1(mol::ChainHandle chain); +mol::ResidueHandle make_uracil2(mol::ChainHandle chain); +mol::ResidueHandle make_defective_uracil2(mol::ChainHandle chain); +mol::ResidueHandle make_1zk(mol::ChainHandle chain); +void verify_1zk_connectivity(const mol::ResidueHandle& r1); +void verify_nucleotide_connectivity(const mol::ResidueHandle& res); +void verify_nucleotide_link(const mol::ResidueHandle& p3, + const mol::ResidueHandle& p5); +void verify_nucleotide_nolink(const mol::ResidueHandle& p3, + const mol::ResidueHandle& p5); +}} + +#endif diff --git a/modules/conop/tests/test_rule_based_conop.cc b/modules/conop/tests/test_rule_based_conop.cc new file mode 100644 index 0000000000000000000000000000000000000000..7c0af27354801cc7a866b0d3f33e60e49ce0c1f8 --- /dev/null +++ b/modules/conop/tests/test_rule_based_conop.cc @@ -0,0 +1,162 @@ + +// ----------------------------------------------------------------------------- +// This file is part of the OpenStructure project <www.openstructure.org> + +// Copyright (C) 2008-2011 by the OpenStructure authors + +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. + +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +// ----------------------------------------------------------------------------- + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <boost/test/auto_unit_test.hpp> + +#include <ost/mol/mol.hh> +#include <ost/platform.hh> +#include <ost/log.hh> +#include <ost/conop/rule_based.hh> +#include <ost/conop/conop.hh> +#include "helper.hh" + +using boost::unit_test_framework::test_suite; +using namespace ost; +using namespace ost::conop; +//using namespace ost::io; +using namespace ost::mol; + +CompoundLibPtr load_lib() +{ + if (!getenv("OST_ROOT")) { + LOG_ERROR("OST_ROOT environment variable not set. Can't load " + "compound library without a proper OST_ROOT"); + return CompoundLibPtr(); + } + SetPrefixPath(getenv("OST_ROOT")); + String lib_path=GetSharedDataPath()+"/compounds.chemlib"; + CompoundLibPtr compound_lib=CompoundLib::Load(lib_path); + return compound_lib; +} + +BOOST_AUTO_TEST_SUITE(conop); + +BOOST_AUTO_TEST_CASE(rule_based_set_get_flags) +{ + CompoundLibPtr lib=load_lib(); + if (!lib) { return; } + RuleBasedProcessor rbc(lib); + // check the defaults + BOOST_CHECK_EQUAL(rbc.GetConnect(), true); + BOOST_CHECK_EQUAL(rbc.GetCheckBondFeasibility(), false); + BOOST_CHECK_EQUAL(rbc.GetUnkAtomTreatment(), CONOP_WARN); + BOOST_CHECK_EQUAL(rbc.GetUnkResidueTreatment(), CONOP_WARN); + BOOST_CHECK_EQUAL(rbc.GetStrictHydrogens(), false); + rbc.PushFlags(); + rbc.SetConnect(false); + rbc.SetCheckBondFeasibility(true); + rbc.SetUnkResidueTreatment(CONOP_FATAL); + rbc.SetUnkAtomTreatment(CONOP_REMOVE); + BOOST_CHECK_EQUAL(rbc.GetConnect(), false); + BOOST_CHECK_EQUAL(rbc.GetCheckBondFeasibility(),true); + BOOST_CHECK_EQUAL(rbc.GetUnkResidueTreatment(),CONOP_FATAL); + BOOST_CHECK_EQUAL(rbc.GetUnkAtomTreatment(),CONOP_REMOVE); + rbc.PopFlags(); +} + +BOOST_AUTO_TEST_CASE(rule_based_push_pop_flags) +{ + CompoundLibPtr lib=load_lib(); + if (!lib) { return; } + RuleBasedProcessor rbc(lib); + BOOST_CHECK_THROW(rbc.PopFlags(), std::runtime_error); + rbc.PushFlags(); + rbc.PopFlags(); + rbc.PushFlags(); + rbc.SetConnect(false); + BOOST_CHECK(!rbc.GetConnect()); + rbc.PopFlags(); + BOOST_CHECK(rbc.GetConnect()); +} + +BOOST_AUTO_TEST_CASE(rule_based_connect) +{ + CompoundLibPtr lib=load_lib(); + if (!lib) { return; } + RuleBasedProcessor rbc(lib); + EntityHandle ent = CreateEntity(); + ChainHandle ch=ent.EditXCS().InsertChain("A"); + ResidueHandle r = make_cytosine(ch); + rbc.SetConnect(false); + rbc.Process(ent); + BOOST_CHECK_EQUAL(static_cast<size_t>(0), + ent.GetBondList().size()); + rbc.SetConnect(true); + rbc.Process(ent); + verify_nucleotide_connectivity(r); +} + + +BOOST_AUTO_TEST_CASE(rule_based_unk_atoms) +{ + CompoundLibPtr lib = load_lib(); + if (!lib) { return; } + RuleBasedProcessor rbc(lib); + EntityHandle ent = CreateEntity(); + XCSEditor edi=ent.EditXCS(); + ChainHandle ch = edi.InsertChain("A"); + ResidueHandle gly = edi.AppendResidue(ch, "GLY"); + edi.InsertAtom(gly, "N", geom::Vec3()); + edi.InsertAtom(gly, "CA", geom::Vec3()); + edi.InsertAtom(gly, "C", geom::Vec3()); + edi.InsertAtom(gly, "O", geom::Vec3()); + edi.InsertAtom(gly, "CB", geom::Vec3()); + EntityHandle ent2 = ent.Copy(); + rbc.SetUnkAtomTreatment(CONOP_SILENT); + DiagnosticsPtr diags=rbc.Process(ent2); + BOOST_CHECK_EQUAL(diags->diag_count(), static_cast<size_t>(0)); + rbc.SetUnkAtomTreatment(CONOP_WARN); + diags = rbc.Process(ent2); + BOOST_CHECK_EQUAL(diags->diag_count(), static_cast<size_t>(1)); + ent2 = ent.Copy(); + rbc.SetUnkAtomTreatment(CONOP_REMOVE); + rbc.Process(ent2); + BOOST_CHECK(!ent2.FindAtom("A", 1, "CB")); + ent2 = ent.Copy(); + rbc.SetUnkAtomTreatment(CONOP_REMOVE_RESIDUE); + rbc.Process(ent2); + BOOST_CHECK_EQUAL(ent2.GetResidueCount(), 0); +} + +BOOST_AUTO_TEST_CASE(rule_based_unk_res) +{ + CompoundLibPtr lib = load_lib(); + if (!lib) { return; } + RuleBasedProcessor rbc(lib); + EntityHandle ent = CreateEntity(); + XCSEditor edi=ent.EditXCS(); + ChainHandle ch = edi.InsertChain("A"); + ResidueHandle gly = edi.AppendResidue(ch, "???"); + EntityHandle ent2 = ent.Copy(); + rbc.SetUnkResidueTreatment(CONOP_SILENT); + DiagnosticsPtr diags=rbc.Process(ent2); + BOOST_CHECK_EQUAL(diags->diag_count(), static_cast<size_t>(0)); + rbc.SetUnkResidueTreatment(CONOP_WARN); + diags = rbc.Process(ent2); + BOOST_CHECK_EQUAL(diags->diag_count(), static_cast<size_t>(1)); + ent2 = ent.Copy(); + rbc.SetUnkResidueTreatment(CONOP_REMOVE); + rbc.Process(ent2); + BOOST_CHECK(!ent2.FindResidue("A", 1)); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/mol/base/src/editor_base.cc b/modules/mol/base/src/editor_base.cc index d75827806359c8dc5036fa7cf819abb0ce7ffcc6..c358a020475c36fb90b44aa48464b21dabc64342 100644 --- a/modules/mol/base/src/editor_base.cc +++ b/modules/mol/base/src/editor_base.cc @@ -147,6 +147,14 @@ void EditorBase::DeleteChain(const ChainHandle& chain) ent_.Impl()->DeleteChain(chain.Impl()); } +void EditorBase::DeleteAtoms(const AtomHandleList& atoms) +{ + for (AtomHandleList::const_iterator i = atoms.begin(), e = atoms.end(); + i != e; ++i) { + i->GetResidue().Impl()->DeleteAtom(i->Impl()); + } +} + void EditorBase::DeleteAtom(const AtomHandle& atom_handle) { CheckHandleValidity(atom_handle); diff --git a/modules/mol/base/src/editor_base.hh b/modules/mol/base/src/editor_base.hh index 504311d547cf00f43e11fb5b1c49925a16564e34..576b3cdb7867bafb0191c642f8b891bc4af4d86b 100644 --- a/modules/mol/base/src/editor_base.hh +++ b/modules/mol/base/src/editor_base.hh @@ -185,6 +185,10 @@ public: /// is the atom to remove. If no such atom exists, this method will /// have no effect void DeleteAtom(const AtomHandle& atom); + /// \ brief Delete a set of atoms + /// + /// All associated torsions and bonds will be removed as well + void DeleteAtoms(const AtomHandleList& atoms); /// \brief Add named torsion to entity TorsionHandle AddTorsion(const String& name, const AtomHandle& a1, diff --git a/modules/mol/base/src/impl/entity_impl.cc b/modules/mol/base/src/impl/entity_impl.cc index 6942ce00fee8b3a93f45e31c083b90c51c80bdd3..3019a0806e0dc3f8838157e09fcd69d93fb2888a 100644 --- a/modules/mol/base/src/impl/entity_impl.cc +++ b/modules/mol/base/src/impl/entity_impl.cc @@ -35,6 +35,7 @@ #include <ost/geom/geom.hh> #include <ost/log.hh> +#include <ost/profile.hh> #include "atom_impl.hh" #include "entity_impl.hh" #include <ost/mol/entity_visitor.hh> @@ -115,6 +116,7 @@ int EntityImpl::GetResidueCount() const EntityImplPtr EntityImpl::Copy() { + Profile prof("EntityImpl::Copy"); #if MAKE_SHARED_AVAILABLE EntityImplPtr ent_p=boost::make_shared<EntityImpl>(); #else diff --git a/tools/molck/main.cc b/tools/molck/main.cc index 07baf9496ca1a9a3aee7bd2deb03502206f010fe..9dd212a51c9dd2bc616f5f9e24a88c4015ff2533 100644 --- a/tools/molck/main.cc +++ b/tools/molck/main.cc @@ -65,14 +65,13 @@ CompoundLibPtr load_compound_lib(const String& custom_path) exe_path = std::string( result, (count > 0) ? count : 0 ); #endif if (exe_path.empty()) { - std::cerr << "Could not determine the path of the molck executable. Will only look for compounds.chemlib in the current working directory" << std::endl; + std::cerr << "Could not determine the path of the molck executable. Will only " + "look for compounds.chemlib in the current working directory" << std::endl; } else { fs::path path_and_exe(exe_path); fs::path path_only=path_and_exe.branch_path(); fs::path share_path = path_only.branch_path(); - share_path/="share"; - share_path/="openstructure"; - share_path/="compounds.chemlib"; + share_path = share_path / "share" / "openstructure" / "compounds.chemlib"; #if BOOST_FILESYSTEM_VERSION==3 || BOOST_VERSION<103400 String share_path_string=share_path.string(); @@ -91,37 +90,33 @@ CompoundLibPtr load_compound_lib(const String& custom_path) return CompoundLibPtr(); } +const char* USAGE= +"this is molck - the molecule checker\n" +"usage: molck [options] file1.pdb [file2.pdb [...]]\n" +"options \n" +" --complib=path location of the compound library file. If not provided, the \n" +" following locations are searched in this order: \n" +" 1. Working directory, 2. OpenStructure standard library location (if the \n" +" executable is part of a standard OpenStructure installation) \n" +" --rm=<a>,<b> remove atoms and residues matching some criteria \n" +" zeroocc - Remove atoms with zero occupancy \n" +" hyd - Remove hydrogen atoms \n" +" oxt - Remove terminal oxygens \n" +" nonstd - Remove all residues not one of the 20 standard amino acids \n" +" unk - Remove unknown and atoms not following the nomenclature\n" +" --fix-ele clean up element column\n" +" --stdout write cleaned file(s) to stdout \n" +" --out=filename write cleaned file(s) to disk. % characters in the filename are \n" +" replaced with the basename of the input file without extension. \n" +" Default: %-molcked.pdb \n" +" --color=auto|on|off \n" +" whether output should be colored\n" +" --map-nonstd maps modified residues back to the parent amino acid, for example\n" +" MSE -> MET, SEP -> SER.\n"; + void usage() { - std::cerr << "usage: molck [options] file1.pdb [file2.pdb [...]]" << std::endl; - std::cerr << "options" << std::endl; - std::cerr << " --complib=path location of the compound library file" << std::endl; - std::cerr << " If not provided, the following locations are searched" << std::endl; - std::cerr << " in this order:" << std::endl; - std::cerr << " 1. Working directory" << std::endl; - std::cerr << " 2. OpenStructure standard library location" << std::endl; - std::cerr << " (if the executable is part of a standard" << std::endl; - std::cerr << " OpenStructure installation)" << std::endl; - std::cerr << " --rm=<a>,<b> remove atoms and residues matching some criteria" << std::endl; - std::cerr << " zeroocc - Remove atoms with zero occupancy" << std::endl; - std::cerr << " hyd - Remove hydrogen atoms" << std::endl; - std::cerr << " oxt - Remove terminal oxygens" << std::endl; - std::cerr << " nonstd - Remove all residues not " << std::endl - << " one of the 20 standard amino acids" << std::endl; - std::cerr << " unk - Remove unknown atoms and atoms that " << std::endl - << " are not supposed to be part of a residue" << std::endl; - std::cerr << " --fix-ele clean up element column" << std::endl; - std::cerr << " --stdout write cleaned file(s) to stdout" << std::endl; - std::cerr << " --fileout=blueprint write cleaned file(s) to disk" << std::endl; - std::cerr << " The blueprint string, which must contain a % character," << std::endl; - std::cerr << " is used to generate the output filename and path" << std::endl; - std::cerr << " by replacing % with the input file name without" << std::endl; - std::cerr << " the extension. Output files automatically add" << std::endl; - std::cerr << " '.pdb' extension" << std::endl; - std::cerr << " --color=auto|on|off " << std::endl - << " whether output should be colored" << std::endl; - std::cerr << " --map-nonstd maps modified residues back to the parent amino " << std::endl - << " acid, e.g. MSE -> MET, SEP -> SER." << std::endl; + std::cerr << USAGE << std::endl; exit(0); } @@ -155,7 +150,8 @@ int main(int argc, char *argv[]) "whether the output should be colored.") ("files", po::value< std::vector<String> >(), "input file(s)") ("stdout", "write cleaned file(s) to stdout") - ("fileout", po::value<String>(&output_blueprint_string), "write cleaned file to output using blueprint to determine path") + ("out,o", po::value<String>(&output_blueprint_string)->default_value("%-molcked.pdb"), + "write cleaned file to output using blueprint to determine path") ("map-nonstd", "map non standard residues back to standard ones (e.g.: MSE->MET,SEP->SER,etc.)") ("fix-ele", "insert element") ("complib", po::value<String>(&custom_path)->default_value(""),"location of the compound library file") @@ -186,11 +182,10 @@ int main(int argc, char *argv[]) } if (vm.count("stdout")) { write_to_stdout = true; - } - if (vm.count("fileout")) { + } else { write_to_file = true; - output_blueprint_string = vm["fileout"].as<String>(); - } + output_blueprint_string = vm["out"].as<String>(); + } if (vm.count("map-nonstd")) { map_nonstd_res = true; } @@ -268,7 +263,7 @@ int main(int argc, char *argv[]) } XCSEditor edi=ent.EditXCS(); - DiagEngine diags; + Diagnostics diags; Checker checker(lib, ent, diags); if (rm_zero_occ_atoms) { std::cerr << "removing atoms with zero occupancy" << std::endl; @@ -320,8 +315,9 @@ int main(int argc, char *argv[]) checker.CheckForCompleteness(); checker.CheckForUnknownAtoms(); checker.CheckForNonStandard(); - for (size_t j=0; j<diags.GetDiags().size(); ++j) { - const Diag* diag=diags.GetDiags()[j]; + for (Diagnostics::const_diag_iterator + j = diags.diags_begin(), e = diags.diags_end(); j != e; ++j) { + const Diag* diag=*j; std::cerr << diag->Format(colored); switch (diag->GetType()) { case DIAG_UNK_ATOM: @@ -373,28 +369,21 @@ int main(int argc, char *argv[]) if (write_to_file) { fs::path input_file_path(files[i]); fs::path input_filename = input_file_path.stem(); - - #if BOOST_FILESYSTEM_VERSION==3 || BOOST_VERSION<103400 String input_filename_string=input_filename.string(); #else String input_filename_string=input_filename.file_string(); #endif - size_t replstart =output_blueprint_string.find('%'); - - if (replstart == String::npos) { - std::cerr << "The output blueprint string does not contain a % character" << std::endl; - exit(-1); - } String output_blueprint_string_copy = output_blueprint_string; - output_blueprint_string_copy.replace(replstart,1,input_filename_string); - output_blueprint_string_copy+=".pdb"; - + if (replstart != String::npos) { + output_blueprint_string_copy.replace(replstart,1,input_filename_string); + } try { fs::path out_path(output_blueprint_string_copy); - if (!exists(out_path)) { - std::cerr << "Output path does not exist: " << output_blueprint_string_copy << std::endl; + if (out_path.has_parent_path() && !exists(out_path.parent_path())) { + std::cerr << "Output path does not exist: " + << output_blueprint_string_copy << std::endl; exit(-1); } } catch (std::exception& e) { @@ -402,7 +391,8 @@ int main(int argc, char *argv[]) size_t perden = String(e.what()).find("Permission denied"); if (perden != String::npos) { - std::cerr << "Cannot write into output directory: " << output_blueprint_string_copy << std::endl; + std::cerr << "Cannot write into output directory: " + << output_blueprint_string_copy << std::endl; exit(-1); } else { std::cerr << e.what() << std::endl; @@ -411,7 +401,7 @@ int main(int argc, char *argv[]) } std::cerr << "Writing out file: " << output_blueprint_string_copy << std::endl; PDBWriter writer(output_blueprint_string_copy, prof); - writer.Write(ent); + writer.Write(ent); } }