diff --git a/core/src/portable_binary_serializer.hh b/core/src/portable_binary_serializer.hh index b45ba31ae6c91c319fb45d05263ebbeca3af32e7..b1637d4eb8eeeaff57f698a55d747e8a2814b0c2 100644 --- a/core/src/portable_binary_serializer.hh +++ b/core/src/portable_binary_serializer.hh @@ -10,6 +10,7 @@ #include <promod3/core/portable_binary_data_source.hh> #include <promod3/core/message.hh> #include <boost/type_traits/is_fundamental.hpp> +#include <boost/type_traits/is_enum.hpp> #include <vector> #include <utility> @@ -68,6 +69,20 @@ inline void Serialize(DS& ds, SERIALIZABLE& object) { object.Serialize(ds); } +/// \brief Wrapper for ost sources/sinks (write w/o conversion unless enum) +template <typename BINARY_TYPE, typename DS, typename SERIALIZABLE> +inline void ConvertBaseType(DS& ds, SERIALIZABLE& value) { + // force conversion for any serializer for enums + if (boost::is_enum<SERIALIZABLE>::value) { + BINARY_TYPE b_value; + if (!ds.IsSource()) b_value = static_cast<BINARY_TYPE>(value); + ds & b_value; + if (ds.IsSource()) value = static_cast<SERIALIZABLE>(b_value); + } else { + ds & value; + } +} + /// \brief Base type writing with conversion. /// Call as "ConvertBaseType<BINARY_TYPE>(sink, value)" /// Req: "BINARY_TYPE b_value = static_cast<BINARY_TYPE>(value)" must work diff --git a/doc/tests/scripts/loop_fragger.py b/doc/tests/scripts/loop_fragger.py index 1dcd41a6fcbdc9010f89b4c8b23ab6145bdd2f59..79e8942e5e51a43aa525a72d0ce589ed470e8b62 100644 --- a/doc/tests/scripts/loop_fragger.py +++ b/doc/tests/scripts/loop_fragger.py @@ -36,3 +36,10 @@ for i in range(len(fragger)): fraction = float(below_three)/len(fragger) print "Fraction of fragments below 3A: %.2f" % fraction + +# add into a cached map with ID based on frag_pos +fragger_map = loop.FraggerMap() +if not fragger_map.Contains(frag_pos): + fragger_map[frag_pos] = fragger +# store it for future use +fragger_map.SaveBB("frag_map.dat") diff --git a/doc/tests/test_doctests.py b/doc/tests/test_doctests.py index ae71e8b3709e223e96d8c6d1c3338e58af67a54a..3254b475206024e65bad180238664c8f70ca0b32 100644 --- a/doc/tests/test_doctests.py +++ b/doc/tests/test_doctests.py @@ -245,8 +245,13 @@ class DocTests(unittest.TestCase): self.assertEqual(rcode, 0) out_lines = sout.splitlines() self.assertEqual(len(out_lines), 101) + # NOTE: this last output depends on the structure-db! self.assertEqual(out_lines[-1].strip(), 'Fraction of fragments below 3A: 0.48') + # check that result exists and is readable + loop.FraggerMap.LoadBB('frag_map.dat') + # clean up + os.remove('frag_map.dat') def testLoopTorsionSampler(self): # fail-safe: exclude test if python-libs missing diff --git a/loop/doc/structure_db.rst b/loop/doc/structure_db.rst index 5374433604ca2c0169da27b7d73e99d1d5713afe..df85102ebabbe6081037a2bfc64a048c0402861e 100644 --- a/loop/doc/structure_db.rst +++ b/loop/doc/structure_db.rst @@ -613,17 +613,14 @@ arbitrary linear combination of following components: .. literalinclude:: ../../../tests/doc/scripts/loop_fragger.py -.. class:: Fragger +.. class:: Fragger(seq) - .. method:: Fragger(seq) + A Fragger object to search a :class:`StructureDB` for fragments with **seq** + as target sequence. You need to add some score components before you can + finally call the Fill function. - Initialize a Fragger object to search a :class:`StructureDB` for fragments with - **seq** as target sequence. You need to add some score components before - you can finally call the Fill function. - - :param seq: Sequence of fragments to be searched - - :type seq: :class:`str` + :param seq: Sequence of fragments to be searched + :type seq: :class:`str` .. method:: Fill(db, rmsd_thresh, num_fragments) @@ -728,6 +725,75 @@ arbitrary linear combination of following components: :type parameter_index: :class:`int` :type index: :class:`int` +.. class:: FraggerMap + + A simple storable map of Fragger objects. The idea is that one can use the map + to cache fragger lists that have already been generated. + + You can use :meth:`Contains` to check if an item with a given key + (:class:`int`) already exists and access items with the [] operator (see + :meth:`__getitem__` and :meth:`__setitem__`). + + Serialization is meant to be temporary and is not guaranteed to be portable. + + .. method:: Load(filename, db) + + Loads raw binary file generated with :meth:`Save`. + + :param filename: Path to the file. + :type filename: :class:`str` + :param db: Source of structural data used when filling the fragments. + :type db: :class:`StructureDB` + + :returns: The loaded map. + :rtype: :class:`FraggerMap` + + :raises: :exc:`~exceptions.RuntimeError` if file cannot be opened. + + .. method:: Save(filename) + + Saves raw binary representation of this map. Only fragment infos and scores + are stored and not the parameters for scoring. The coordinates are to be + reread from a structure db. + + :param filename: Path to the file. + :type filename: :class:`str` + + :raises: :exc:`~exceptions.RuntimeError` if file cannot be opened. + + .. method:: LoadBB(filename, db) + + Loads raw binary file generated with :meth:`SaveBB`. + + :param filename: Path to the file. + :type filename: :class:`str` + + :returns: The loaded map. + :rtype: :class:`FraggerMap` + + :raises: :exc:`~exceptions.RuntimeError` if file cannot be opened. + + .. method:: SaveBB(filename) + + Saves raw binary representation of this map. Only fragments and scores + are stored and not the parameters for scoring. Here, we also store the + coordinates. This file will hence be much larger than the one saved with + :meth:`Save`. + + :param filename: Path to the file. + :type filename: :class:`str` + + :raises: :exc:`~exceptions.RuntimeError` if file cannot be opened. + + .. method:: Contains(id) + + :return: True, iff a fragger object for this id is already in the map. + :rtype: :class:`bool` + + .. method:: __getitem__(id) + __setitem__(id) + + Allow read/write access (with [*id*]) to fragger object with given ID. .. [soding2005] Söding J (2005). Protein homology detection by HMM-HMM comparison. Bioinformatics 21 (7): 951–960. .. [sanner1996] Sanner M, Olson AJ, Spehner JC (1996). Reduced Surface: an Efficient Way to Compute Molecular Surfaces. Biopolymers 38 (3): 305-320. diff --git a/loop/pymod/export_fragger.cc b/loop/pymod/export_fragger.cc index 8c3a6017647307dbd66eac24e283f4ab047bd24c..9e73aef379495c2b10f906449f8d8e2ba8af24a2 100644 --- a/loop/pymod/export_fragger.cc +++ b/loop/pymod/export_fragger.cc @@ -59,11 +59,14 @@ namespace{ } fragger.AddTorsionProbabilityParameters(w,v_t_s,before,after); } + + FraggerPtr fm_getitem(FraggerMap& fm, int i) { return fm[i]; } + void fm_setitem(FraggerMap& fm, int i, FraggerPtr item) { fm[i] = item; } } -void export_Fragger(){ +void export_Fragger() { - class_<Fragger> ("Fragger", init<String>()) + class_<Fragger, FraggerPtr>("Fragger", init<String>()) .def("AddSeqIDParameters",&Fragger::AddSeqIDParameters,(arg("weight"))) .def("AddSeqSimParameters",&Fragger::AddSeqSimParameters,(arg("weight"),arg("subst_matrix"))) .def("AddSSAgreeParameters",&Fragger::AddSSAgreeParameters,(arg("weight"),arg("psipred_prediction"))) @@ -79,5 +82,15 @@ void export_Fragger(){ .def("GetScore",&wrap_GetSingleParameterScore,(arg("parameter_index"),arg("index"))) ; - register_ptr_to_python<FraggerPtr>(); + class_<FraggerMap, FraggerMapPtr>("FraggerMap", init<>()) + .def("Load", &FraggerMap::Load, (arg("filename"), arg("db"))) + .staticmethod("Load") + .def("Save", &FraggerMap::Save, (arg("filename"))) + .def("LoadBB", &FraggerMap::LoadBB, (arg("filename"))) + .staticmethod("LoadBB") + .def("SaveBB", &FraggerMap::SaveBB, (arg("filename"))) + .def("Contains", &FraggerMap::Contains, (arg("id"))) + .def("__getitem__", &fm_getitem, (arg("id"))) + .def("__setitem__", &fm_setitem, (arg("id"), arg("fragger"))) + ; } diff --git a/loop/pymod/export_structure_db.cc b/loop/pymod/export_structure_db.cc index 60fb66df095763aea5955e71c133575322ae7033..d17411d3d0357e6eb6d52e0b6b39eef3f8eeb0ae 100644 --- a/loop/pymod/export_structure_db.cc +++ b/loop/pymod/export_structure_db.cc @@ -99,7 +99,9 @@ void export_StructureDB(){ class_<FragmentInfo>("FragmentInfo",init<unsigned short, unsigned short, unsigned short>()) .def_readwrite("chain_index",&FragmentInfo::chain_index) .def_readwrite("offset",&FragmentInfo::offset) - .add_property("length", &GetFragInfoLength, &SetFragInfoLength); + .add_property("length", &GetFragInfoLength, &SetFragInfoLength) + .def(self == self) + .def(self != self) ; class_<StructureDB, boost::noncopyable> ("StructureDB", init<>()) diff --git a/loop/src/backbone.hh b/loop/src/backbone.hh index a9ddee8a3a1e9060be89b9c67d8da185d4481469..6b4fda3b1217b964562a365115ef6483571b5b96 100644 --- a/loop/src/backbone.hh +++ b/loop/src/backbone.hh @@ -15,6 +15,7 @@ #include <promod3/core/geom_base.hh> #include <promod3/core/superpose.hh> +#include <promod3/core/portable_binary_serializer.hh> namespace promod3 { namespace loop { @@ -50,6 +51,26 @@ struct Backbone { bool operator!=(const Backbone& rhs) const { return !this->operator==(rhs); } + // portable serialization + // (cleanly element by element with fixed-width base-types) + template <typename DS> + void SerializeVec3(DS& ds, geom::Vec3& vec) { + core::ConvertBaseType<float>(ds, vec.x); + core::ConvertBaseType<float>(ds, vec.y); + core::ConvertBaseType<float>(ds, vec.z); + } + template <typename DS> + void Serialize(DS& ds) { + SerializeVec3(ds, n_coord); + SerializeVec3(ds, ca_coord); + SerializeVec3(ds, cb_coord); + SerializeVec3(ds, c_coord); + SerializeVec3(ds, o_coord); + ds & one_letter_code; + if (ds.IsSource()) { + aa = ost::conop::OneLetterCodeToAminoAcid(one_letter_code); + } + } }; class BackboneList{ @@ -166,6 +187,11 @@ public: geom::Mat4 GetTransform(const BackboneList& other) const; + template <typename DS> + void Serialize(DS& ds) { + ds & bb_list_; + } + private: std::vector<Backbone> bb_list_; }; diff --git a/loop/src/fragger.cc b/loop/src/fragger.cc index 7436d34f53934e9a36dd281b4b0b86651736d5fc..f896ee0accb94717f61f8bb1ebd7f54e775a4d95 100644 --- a/loop/src/fragger.cc +++ b/loop/src/fragger.cc @@ -1,5 +1,11 @@ #include <promod3/loop/fragger.hh> +// for raw serialization +#include <ost/io/binary_data_source.hh> +#include <ost/io/binary_data_sink.hh> +#include <ost/io/container_serialization.hh> +#include <fstream> + namespace{ const Real ss_agreements[8][3][10] = @@ -691,4 +697,87 @@ void Fragger::GenerateProfileProfile(StructureDBPtr db, } } +FraggerMapPtr FraggerMap::Load(const String& file_name, StructureDBPtr db) { + return Load_(file_name, db); +} +FraggerMapPtr FraggerMap::LoadBB(const String& file_name) { + return Load_(file_name, StructureDBPtr()); +} +void FraggerMap::Save(const String& file_name) { + Save_(file_name, false); +} +void FraggerMap::SaveBB(const String& file_name) { + Save_(file_name, true); +} + +FraggerMapPtr FraggerMap::Load_(const String& file_name, StructureDBPtr db) { + // open file + std::ifstream in_stream_(file_name.c_str(), std::ios::binary); + if (!in_stream_) { + std::stringstream ss; + ss << "The file '" << file_name << "' does not exist."; + throw promod3::Error(ss.str()); + } + ost::io::BinaryDataSource ds(in_stream_); + + // fill data + FraggerMapPtr fmp(new FraggerMap); + FraggerMap& fragger_map = *fmp; + size_t map_size; + ds & map_size; + for (size_t i = 0; i < map_size; ++i) { + int id; + String sequence; + ds & id; + ds & sequence; + FraggerPtr fragger(new Fragger(sequence)); + ds & fragger->fragment_infos_; + ds & fragger->scores_; + ds & fragger->single_scores_; + ds & fragger->weights_; + ds & fragger->score_types_; + // get from db or file? + if (db) { + BackboneList bb_list; + fragger->fragments_.clear(); + for (size_t j = 0; j < fragger->fragment_infos_.size(); ++j) { + db->FillBackbone(bb_list, fragger->fragment_infos_[j]); + fragger->fragments_.push_back(bb_list); + } + // fix sequences + fragger->AssignCorrectSequences(); + } else { + ds & fragger->fragments_; + } + fragger_map[id] = fragger; + } + + return fmp; +} + +void FraggerMap::Save_(const String& file_name, bool with_bb) { + // open file + std::ofstream out_stream_(file_name.c_str(), std::ios::binary); + if (!out_stream_) { + std::stringstream ss; + ss << "The file '" << file_name << "' cannot be opened."; + throw promod3::Error(ss.str()); + } + ost::io::BinaryDataSink ds(out_stream_); + + // save map + ds & map_.size(); + for (std::map<int, FraggerPtr>::iterator i = map_.begin(); + i != map_.end(); ++i) { + ds & i->first; + ds & i->second->frag_sequence_; + ds & i->second->fragment_infos_; + ds & i->second->scores_; + ds & i->second->single_scores_; + ds & i->second->weights_; + ds & i->second->score_types_; + if (with_bb) ds & i->second->fragments_; + } +} + }} //ns diff --git a/loop/src/fragger.hh b/loop/src/fragger.hh index c4209bb82aaa05bcc2fcebbc98ac4c34d43f5371..2af7f5a14868fa9c860892d85b41d4b7b0531a73 100644 --- a/loop/src/fragger.hh +++ b/loop/src/fragger.hh @@ -5,16 +5,22 @@ #include <promod3/loop/backbone.hh> #include <promod3/loop/torsion_sampler.hh> #include <promod3/loop/psipred_prediction.hh> +#include <ost/io/binary_data_source.hh> +#include <ost/io/binary_data_sink.hh> +#include <promod3/core/portable_binary_serializer.hh> #include <ost/seq/profile_handle.hh> #include <ost/conop/amino_acids.hh> #include <list> +#include <map> namespace promod3 { namespace loop { class Fragger; +class FraggerMap; typedef boost::shared_ptr<Fragger> FraggerPtr; +typedef boost::shared_ptr<FraggerMap> FraggerMapPtr; typedef enum{ SeqSim, @@ -25,6 +31,13 @@ typedef enum{ StructureProfile } FraggerScoreType; +// portable serialization (with fixed-width base-types) +inline void Serialize(ost::io::BinaryDataSource& ds, FraggerScoreType& st) { + core::ConvertBaseType<uint8_t>(ds, st); +} +inline void Serialize(ost::io::BinaryDataSink& ds, FraggerScoreType& st) { + core::ConvertBaseType<uint8_t>(ds, st); +} class Fragger{ @@ -123,6 +136,26 @@ private: std::vector<Real> weights_; std::vector<FraggerScoreType> score_types_; String frag_sequence_; + + // internal access to FraggerMap for storage + friend class FraggerMap; +}; + +/// \brief Simple storable map of Fragger objects. +/// Idea is that one can use the map to cache fragger lists that have already +/// been generated. Serialization is meant to be temporary and is not portable. +class FraggerMap { +public: + static FraggerMapPtr Load(const String& file_name, StructureDBPtr db); + void Save(const String& filename); + static FraggerMapPtr LoadBB(const String& file_name); + void SaveBB(const String& filename); + bool Contains(int id) { return map_.find(id) != map_.end(); } + FraggerPtr& operator[](int id) { return map_[id]; } +private: + static FraggerMapPtr Load_(const String& file_name, StructureDBPtr db); + void Save_(const String& filename, bool with_bb); + std::map<int, FraggerPtr> map_; }; }}