diff --git a/modules/mol/base/src/impl/CMakeLists.txt b/modules/mol/base/src/impl/CMakeLists.txt index 45a453ac1901307c628aeccb2f427a0293d480d3..4ee2d05bc2157ad462dbac93391d579a85cef903 100644 --- a/modules/mol/base/src/impl/CMakeLists.txt +++ b/modules/mol/base/src/impl/CMakeLists.txt @@ -30,6 +30,7 @@ entity_impl_fw.hh fragment_impl.hh fragment_impl_fw.hh query_ast.hh +query_ast_fw.hh query_impl.hh query_impl_fw.hh residue_impl.hh diff --git a/modules/mol/base/src/impl/query_ast.cc b/modules/mol/base/src/impl/query_ast.cc index 45cd0dabc71895672921dbff36077d051fde7de2..697ff173f1271b36e576d37acff20e9e50df5d58 100644 --- a/modules/mol/base/src/impl/query_ast.cc +++ b/modules/mol/base/src/impl/query_ast.cc @@ -24,7 +24,11 @@ namespace ost { namespace mol { namespace impl { - + +LogicOPNode::LogicOPNode(LogicOP op) + : lhs_(NULL), rhs_(NULL), op_(op) +{} + LogicOPNode::~LogicOPNode() { if (rhs_) delete rhs_; @@ -109,10 +113,54 @@ const geom::Vec3& WithinParam::GetCenter() const { return center_; } -LogicOPNode::LogicOPNode(LogicOP op) - : lhs_(NULL), rhs_(NULL), op_(op) { - +StringOrRegexParam::StringOrRegexParam(): + is_regex_(false), r_(), s_() +{} + +StringOrRegexParam::StringOrRegexParam(const String& s): + is_regex_(false),r_(),s_(s) +{ + for(String::const_iterator it=s.begin();it!=s.end();++it) { + if((*it)=='?' || (*it)=='*') { + is_regex_=true; + break; + } + } + + if(is_regex_) { + std::ostringstream e; + for(String::const_iterator it=s.begin();it!=s.end();++it) { + if((*it)=='?') { + e << "."; + } else if((*it)=='*') { + e << ".*"; + } else { + e << *it; + } + } + //std::cerr << "assembling regex [" << e.str() << "]... "; + r_=boost::regex(e.str()); + //std::cerr << "done" << std::endl; + } +} + +bool StringOrRegexParam::Match(const String& s) const +{ + if(is_regex_) { + //std::cerr << "matching regex to [" << s << "]... "; + bool result = boost::regex_match(s,r_); + //std::cerr << "done" << std::endl; + return result; + } + return s==s_; +} + +bool StringOrRegexParam::operator==(const StringOrRegexParam& o) const +{ + if(is_regex_) return r_==o.r_; + return s_==o.s_; } + Node* Node::GetParent() { return parent_; } diff --git a/modules/mol/base/src/impl/query_ast.hh b/modules/mol/base/src/impl/query_ast.hh index 50cd4a670a16ebb77cb7ad47ef1db28289d3fa70..2f46ec4996f8fad405f024f265aa321acd002ab8 100644 --- a/modules/mol/base/src/impl/query_ast.hh +++ b/modules/mol/base/src/impl/query_ast.hh @@ -19,18 +19,22 @@ #ifndef OST_QUERY_AST_HH #define OST_QUERY_AST_HH +#include <boost/regex.hpp> + #include <ost/mol/module_config.hh> #include <boost/variant.hpp> #include <ost/geom/vec3.hh> #include <ost/mol/view_type_fw.hh> #include <ost/mol/property_id.hh> +#include "query_ast_fw.hh" + namespace ost { namespace mol { namespace impl { /// holds the right and left hand operand of a within statement. The lefthand -/// operand is a distance in Anstrom, the righthand parameter may both be -/// a lazily bound reference to a point cloud (that is only known at) -/// instantiation time or a vector. +/// operand is a distance in Angstrom, the righthand parameter may both be +/// a lazily bound reference to a point cloud (that is only known at +/// instantiation time) or a vector. class DLLEXPORT_OST_MOL WithinParam { public: WithinParam(const geom::Vec3& center, float radius); @@ -50,7 +54,21 @@ private: int lazily_bound_ref_; }; -typedef boost::variant<int, float, String, WithinParam> ParamType; +// holds either a simple string or a full regex, constructed from a glob-style string +class DLLEXPORT_OST_MOL StringOrRegexParam { + public: + StringOrRegexParam(); + explicit StringOrRegexParam(const String& s); + bool Match(const String& s) const; + bool operator==(const StringOrRegexParam&) const; + const String& str() const {return s_;} + private: + bool is_regex_; + boost::regex r_; + String s_; +}; + +typedef boost::variant<int, float, WithinParam, StringOrRegexParam> ParamType; // AST node, used internally for building the AST tree. class DLLEXPORT_OST_MOL Node { @@ -66,10 +84,6 @@ private: Node* parent_; }; -typedef enum { - LOP_OR, LOP_AND -} LogicOP; - class DLLEXPORT_OST_MOL LogicOPNode : public Node { @@ -104,10 +118,6 @@ private: LogicOP op_; }; -typedef enum { - COP_EQ, COP_NEQ, COP_GE, COP_LE, COP_LT, COP_GT -} CompOP; - class DLLEXPORT_OST_MOL SelNode : public Node { public: diff --git a/modules/mol/base/src/impl/query_ast_fw.hh b/modules/mol/base/src/impl/query_ast_fw.hh new file mode 100644 index 0000000000000000000000000000000000000000..4d69a9bf610206f3d61c0f41e8dafd0c0a6bf3f4 --- /dev/null +++ b/modules/mol/base/src/impl/query_ast_fw.hh @@ -0,0 +1,41 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2010 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_QUERY_AST_FW_HH +#define OST_QUERY_AST_FW_HH + +namespace ost { namespace mol { namespace impl { + + class WithinParam; + class StringOrRegexParam; + class Node; + class LogicOpNode; + class SelNode; + + typedef enum { + LOP_OR, LOP_AND + } LogicOP; + + typedef enum { + COP_EQ, COP_NEQ, COP_GE, COP_LE, COP_LT, COP_GT + } CompOP; + + +}}} + +#endif diff --git a/modules/mol/base/src/impl/query_impl.cc b/modules/mol/base/src/impl/query_impl.cc index 463daac1c6284aa39b8e436510eab3601d62ebca..25d4e200a1fa9ada743f7c41260cbb45bfb62f4c 100644 --- a/modules/mol/base/src/impl/query_impl.cc +++ b/modules/mol/base/src/impl/query_impl.cc @@ -148,7 +148,7 @@ QueryToken QueryLexer::LexNumericToken() { } bool is_ident_or_str(char c) { - static String allowed_chars("_"); + static String allowed_chars("_*?"); return isalnum(c) || allowed_chars.find_first_of(c)!=String::npos; } @@ -460,7 +460,7 @@ bool QueryImpl::ParseValue(const Prop& sel, const QueryToken& op, } return false; } else { - value=value_string; + value=StringOrRegexParam(value_string); } break; @@ -471,13 +471,13 @@ bool QueryImpl::ParseValue(const Prop& sel, const QueryToken& op, error_desc_.range=v.GetRange(); return false; } else if (sel.type==Prop::STRING) { - value=value_string; + value=StringOrRegexParam(value_string); } else value=ParamType(float(atof(value_string.c_str()))); break; case tok::IntegralValue: if (sel.type==Prop::STRING) { - value=value_string; + value=StringOrRegexParam(value_string); } else { if (sel.type==Prop::INT) { value=ParamType(atoi(value_string.c_str())); @@ -644,12 +644,12 @@ Node* QueryImpl::ParsePropValueExpr(QueryLexer& lexer) { } LogicOP lop=inversion_stack_.back() ? LOP_OR : LOP_AND; CompOP cop=inversion_stack_.back() ? COP_NEQ : COP_EQ; - ParamType cname_val(query_string_.substr(cname.GetValueRange().Loc, - cname.GetValueRange().Length).c_str()); + ParamType cname_val(StringOrRegexParam(query_string_.substr(cname.GetValueRange().Loc, + cname.GetValueRange().Length).c_str())); Prop cname_prop(Prop::CNAME, Prop::STRING, Prop::CHAIN); SelNode* cname_node=new SelNode(cname_prop, cop, cname_val); - ParamType aname_val(query_string_.substr(aname.GetValueRange().Loc, - aname.GetValueRange().Length).c_str()); + ParamType aname_val(StringOrRegexParam(query_string_.substr(aname.GetValueRange().Loc, + aname.GetValueRange().Length).c_str())); Prop aname_prop(Prop::ANAME, Prop::STRING, Prop::ATOM); SelNode* aname_node=new SelNode(aname_prop, cop, aname_val); ParamType rnum_val(atoi(query_string_.substr(rnum.GetValueRange().Loc, diff --git a/modules/mol/base/src/query_state.cc b/modules/mol/base/src/query_state.cc index 2f04aeecc9c9d02ef318e6b22cb20b5702a87e22..1e5c8d3a2841b79658ac7b350275d0582a96d5fb 100644 --- a/modules/mol/base/src/query_state.cc +++ b/modules/mol/base/src/query_state.cc @@ -26,9 +26,12 @@ #include <ost/mol/impl/residue_impl.hh> #include <ost/mol/impl/atom_impl.hh> #include <ost/mol/impl/query_impl.hh> +#include <ost/mol/impl/query_ast.hh> namespace ost { namespace mol { +using namespace impl; + struct LazilyBoundRef { LazilyBoundRef& operator=(const LazilyBoundRef& rhs); //EntityView for now, will be generalized to a point cloud later on. @@ -41,21 +44,19 @@ struct LazilyBoundData { -using namespace impl; - -bool cmp_string(CompOP op,const String& lhs, const String& rhs) { +bool cmp_string(CompOP op,const String& lhs, const StringOrRegexParam& rhs) { switch (op) { case COP_EQ: - return lhs == rhs; + return rhs.Match(lhs); case COP_NEQ: - return lhs != rhs; + return !rhs.Match(lhs); default: assert(0 && "should be checked during ast generation"); return false; } } -bool QueryState::do_within(const geom::Vec3& pos, const impl::WithinParam& p, +bool QueryState::do_within(const geom::Vec3& pos, const WithinParam& p, CompOP op) { if (!p.HasValidRef()) { @@ -142,7 +143,7 @@ const LazilyBoundRef& QueryState::GetBoundObject(int i) const { -boost::logic::tribool QueryState::EvalChain(const impl::ChainImplPtr& c) { +boost::logic::tribool QueryState::EvalChain(const ChainImplPtr& c) { if (q_.empty_optimize_) return true; const std::set<size_t>& indices = q_.indices_[(int)Prop::CHAIN]; @@ -155,7 +156,7 @@ boost::logic::tribool QueryState::EvalChain(const impl::ChainImplPtr& c) { case Prop::CNAME: value = c->GetName(); s_[*i] = cmp_string(ss.comp_op, - boost::get<String>(ss.param),value); + value,boost::get<StringOrRegexParam>(ss.param)); continue; default: if (ss.sel_id>=Prop::CUSTOM) { @@ -176,7 +177,7 @@ boost::logic::tribool QueryState::EvalChain(const impl::ChainImplPtr& c) { return this->EvalStack(Prop::CHAIN); } -boost::logic::tribool QueryState::EvalResidue(const impl::ResidueImplPtr& r) { +boost::logic::tribool QueryState::EvalResidue(const ResidueImplPtr& r) { if (q_.empty_optimize_) return true; const std::set<size_t>& indices = q_.indices_[(int)Prop::RESIDUE]; @@ -191,7 +192,7 @@ boost::logic::tribool QueryState::EvalResidue(const impl::ResidueImplPtr& r) { case Prop::RNAME: str_value = r->GetKey(); s_[*i] = cmp_string(ss.comp_op,str_value, - boost::get<String>(ss.param)); + boost::get<StringOrRegexParam>(ss.param)); continue; case Prop::RNUM: int_value=r->GetNumber().GetNum(); @@ -222,7 +223,7 @@ boost::logic::tribool QueryState::EvalResidue(const impl::ResidueImplPtr& r) { s_[*i]=cmp_num<int>(ss.comp_op,int_value,boost::get<int>(ss.param)); break; case Prop::RTYPE: - p=boost::get<String>(ss.param); + p=boost::get<StringOrRegexParam>(ss.param).str(); if (p.length()>1) { bool b=false; if (p=="helix") { @@ -238,7 +239,7 @@ boost::logic::tribool QueryState::EvalResidue(const impl::ResidueImplPtr& r) { } else { str_value= String(1, (char)r->GetSecStructure()); s_[*i]=cmp_string(ss.comp_op,str_value, - boost::get<String>(ss.param)); + boost::get<StringOrRegexParam>(ss.param)); } break; case Prop::RINDEX: @@ -306,7 +307,7 @@ QueryState::QueryState() : s_(), q_(dummy_query_impl) { } -boost::logic::tribool QueryState::EvalAtom(const impl::AtomImplPtr& a) { +boost::logic::tribool QueryState::EvalAtom(const AtomImplPtr& a) { if (q_.empty_optimize_) return true; const std::set<size_t>& indices = q_.indices_[(int)Prop::ATOM]; @@ -320,32 +321,32 @@ boost::logic::tribool QueryState::EvalAtom(const impl::AtomImplPtr& a) { case Prop::ANAME: str_value = a->GetName(); s_[*i] = cmp_string(ss.comp_op,str_value, - boost::get<String>(ss.param)); + boost::get<StringOrRegexParam>(ss.param)); break; case Prop::AX: float_value=(a->GetPos())[0]; s_[*i]=cmp_num<Real>(ss.comp_op, float_value, - boost::get<float>(ss.param)); + boost::get<float>(ss.param)); break; case Prop::AY: float_value=(a->GetPos())[1]; s_[*i]=cmp_num<Real>(ss.comp_op, float_value, - boost::get<float>(ss.param)); + boost::get<float>(ss.param)); break; case Prop::AZ: float_value=(a->GetPos())[2]; s_[*i]=cmp_num<Real>(ss.comp_op, float_value, - boost::get<float>(ss.param)); + boost::get<float>(ss.param)); break; case Prop::OCC: float_value=a->GetOccupancy(); s_[*i]=cmp_num<Real>(ss.comp_op, float_value, - boost::get<float>(ss.param)); + boost::get<float>(ss.param)); break; case Prop::ELE: str_value = a->GetElement(); s_[*i] = cmp_string(ss.comp_op,str_value, - boost::get<String>(ss.param)); + boost::get<StringOrRegexParam>(ss.param)); break; case Prop::ABFAC: float_value=a->GetBFactor(); diff --git a/modules/mol/base/src/query_state.hh b/modules/mol/base/src/query_state.hh index 30c1cd0692a15bfc67002cd4aa6381cdb28d5e13..ad6feffd277af599affe3f4f4745610e5991bbf9 100644 --- a/modules/mol/base/src/query_state.hh +++ b/modules/mol/base/src/query_state.hh @@ -22,9 +22,11 @@ /* Author: Marco Biasini */ + +#include <ost/geom/geom.hh> #include <ost/mol/impl/query_impl_fw.hh> -#include <ost/mol/impl/query_ast.hh> +#include <ost/mol/impl/query_ast_fw.hh> #include <ost/mol/handle_type_fw.hh> #include <boost/logic/tribool.hpp> @@ -37,7 +39,8 @@ namespace ost { namespace mol { - +class EntityHandle; +class EntityView; struct LazilyBoundData; struct LazilyBoundRef; diff --git a/modules/mol/base/tests/test_query.cc b/modules/mol/base/tests/test_query.cc index e8d56b6a1ee8873f168631cfd8056790e0112483..b41ae5db0f17e2618681415b0d443b8b6b039cf1 100644 --- a/modules/mol/base/tests/test_query.cc +++ b/modules/mol/base/tests/test_query.cc @@ -85,6 +85,7 @@ void ensure_counts(EntityHandle e, const String& qs, int cc, int rc, int ac) { " for query String " << qs); } + void ensure_counts_v(EntityView src, const String& qs, int cc, int rc, int ac) { EntityView v; @@ -271,4 +272,14 @@ BOOST_AUTO_TEST_CASE(test_query_throw) BOOST_CHECK_NO_THROW(e.Select("gcnotsetprop:0=1")); } +BOOST_AUTO_TEST_CASE(test_glob) +{ + EntityHandle e=make_query_test_entity(); + ensure_counts(e, "rname=MET and aname=C*", 1, 1, 5); + ensure_counts(e, "rname=ARG and aname=N?1", 1, 1, 1); + ensure_counts(e, "rname=ARG and aname=NH?", 1, 1, 2); + ensure_counts(e, "rname=ARG and aname=\"*2\"", 1, 1, 1); + //ensure_counts(e, "rname=ARG and aname=N?", 1, 1, 1); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/modules/mol/base/tests/test_query_standalone.cc b/modules/mol/base/tests/test_query_standalone.cc new file mode 100644 index 0000000000000000000000000000000000000000..abca4ee21c7ec8e1a1fc881224360eca1fefce02 --- /dev/null +++ b/modules/mol/base/tests/test_query_standalone.cc @@ -0,0 +1,43 @@ +#include <ost/mol/mol.hh> + +using namespace ost; +using namespace ost::mol; + +int main() +{ + EntityHandle eh = CreateEntity(); + XCSEditor e=eh.EditXCS(); + ChainHandle chain = e.InsertChain("A"); + ResidueHandle res = e.AppendResidue(chain, "MET"); + e.InsertAtom(res, "N",geom::Vec3(21.609,35.384,56.705), "N"); + e.InsertAtom(res, "CA",geom::Vec3(20.601,35.494,57.793), "C"); + e.InsertAtom(res, "C",geom::Vec3(19.654,34.300,57.789), "C"); + e.InsertAtom(res, "O",geom::Vec3(18.447,34.456,57.595), "O"); + e.InsertAtom(res, "CB",geom::Vec3(19.789,36.783,57.639), "C"); + e.InsertAtom(res, "CG",geom::Vec3(20.629,38.055,57.606), "C"); + e.InsertAtom(res, "SD",geom::Vec3(21.638,38.325,59.084), "S"); + e.InsertAtom(res, "CE",geom::Vec3(23.233,37.697,58.529), "C"); + res = e.AppendResidue(chain, "ARG"); + e.InsertAtom(res, "N",geom::Vec3(20.202,33.112,58.011), "N"); + e.InsertAtom(res, "CA",geom::Vec3(19.396,31.903,58.033), "C"); + e.InsertAtom(res, "C",geom::Vec3(18.608,31.739,59.328), "C"); + e.InsertAtom(res, "O",geom::Vec3(17.651,30.965,59.381), "O"); + e.InsertAtom(res, "CB",geom::Vec3(20.284,30.681,57.801), "C"); + e.InsertAtom(res, "CG",geom::Vec3(20.665,30.488,56.342), "C"); + e.InsertAtom(res, "CD",geom::Vec3(21.557,29.281,56.154), "C"); + e.InsertAtom(res, "NE",geom::Vec3(22.931,29.557,56.551), "N"); + e.InsertAtom(res, "CZ",geom::Vec3(23.901,28.653,56.528), "C"); + e.InsertAtom(res, "NH1",geom::Vec3(23.640,27.417,56.130), "N"); + e.InsertAtom(res, "NH2",geom::Vec3(25.132,28.980,56.893), "N"); + + EntityView v=eh.Select("rname=MET and aname=C*"); + std::cerr << v.GetAtomCount() << std::endl; + v=eh.Select("rname=ARG and aname=N?1"); + std::cerr << v.GetAtomCount() << std::endl; + v=eh.Select("rname=ARG and aname=NH?"); + std::cerr << v.GetAtomCount() << std::endl; + v=eh.Select("rname=ARG and aname=\"*2\""); + std::cerr << v.GetAtomCount() << std::endl; + v=eh.Select("rname=ARG and aname=N?"); + std::cerr << v.GetAtomCount() << std::endl; +}