Skip to content
Snippets Groups Projects
Commit fb34475e authored by Studer Gabriel's avatar Studer Gabriel
Browse files

Add a simple HMM database

parent 91f4cdd8
Branches
Tags
No related merge requests found
......@@ -519,6 +519,49 @@ single columns containing amino acid frequencies and transition probabilities.
Average entropy of all the columns
.. class:: HMMDB
A simple database to gather :class:`HMM` objects. It is possible
to save them to disk in a compressed format with limited accuracy
(4 digits for freq values).
.. method:: Save(filename)
:param filename: Name of file that will be generated on disk.
:type filename: :class:`str`
.. method:: Load(filename)
Static loading method
:param filename: Name of file from which the database should be loaded.
:type filename: :class:`str`
:returns: The loaded database
.. method:: AddHMM(name, hmm)
:param name: Name of HMM to be added
:param hmm: HMM to be added
:type name: :class:`str`
:type hmm: :class:`HMM`
:raises: :class:`Exception` when filename is longer than 255 characters.
.. method:: GetHMM(name)
:param name: Name of HMM to be returned
:type name: :class:`str`
:returns: The requested :class:`HMM`
:raises: :class:`Exception` when no :class:`HMM` for **name** exists.
.. method:: Size()
:returns: Number of :class:`HMM` objects in the database
.. method:: GetNames()
:returns: A nonsorted list of the names of all :class:`HMM` objects in the database
......
......@@ -4,6 +4,20 @@
using namespace ost::seq;
using namespace boost::python;
namespace{
boost::python::list wrap_get_names(HMMDBPtr db){
std::vector<String> v_names = db->GetNames();
boost::python::list names;
for(std::vector<String>::iterator i = v_names.begin();
i != v_names.end(); ++i){
names.append(*i);
}
return names;
}
}
void export_hmm()
{
......@@ -46,4 +60,14 @@ void export_hmm()
.add_property("avg_entropy", &HMM::GetAverageEntropy)
.add_property("sequence",&HMM::GetSequence)
;
class_<HMMDB, HMMDBPtr>("HMMDB", init<>())
.def("Load", &HMMDB::Load).staticmethod("Load")
.def("Save", &HMMDB::Save)
.def("AddHMM", &HMMDB::AddHMM)
.def("GetHMM", &HMMDB::GetHMM)
.def("Size", &HMMDB::size)
.def("GetNames",&wrap_get_names)
;
}
......@@ -171,4 +171,85 @@ Real HMM::GetAverageEntropy() const {
return (n > 0) ? n_eff/n : 0.0;
}
void HMMDB::Save(const String& filename) const{
std::ofstream out_stream(filename.c_str(), std::ios::binary);
//write out total size
uint32_t total_size = data_.size();
out_stream.write(reinterpret_cast<char*>(&total_size),4);
//write out the data elements
char string_size;
for(std::map<String,HMMPtr>::const_iterator i = data_.begin();
i != data_.end(); ++i){
string_size = static_cast<char>(i->first.size());
out_stream.write(reinterpret_cast<char*>(&string_size),1);
out_stream.write(i->first.c_str(),string_size);
out_stream << *i->second;
}
out_stream.close();
}
HMMDBPtr HMMDB::Load(const String& filename){
std::ifstream in_stream(filename.c_str(), std::ios::binary);
if (!in_stream){
std::stringstream ss;
ss << "the file '" << filename << "' does not exist.";
throw std::runtime_error(ss.str());
}
HMMDBPtr db(new HMMDB);
//read in the total size
uint32_t total_size;
in_stream.read(reinterpret_cast<char*>(&total_size),4);
//read in the single hmms
char string_size;
for(uint i = 0; i < total_size; ++i){
in_stream.read(&string_size,1);
String name(string_size,'X');
in_stream.read(&name[0],string_size);
HMMPtr hmm(new HMM);
in_stream >> *hmm;
db->AddHMM(name,hmm);
}
return db;
}
void HMMDB::AddHMM(const String& name, HMMPtr hmm){
if(name.size() > 255){
throw std::runtime_error("Name of HMM must be smaller than 256!");
}
if(name.empty()){
throw std::runtime_error("Name must not be empty!");
}
data_[name] = hmm;
}
HMMPtr HMMDB::GetHMM(const String& name) const{
std::map<String,HMMPtr>::const_iterator i = data_.find(name);
if(i == data_.end()){
std::stringstream ss;
ss << "HMM database does not contain an entry with name ";
ss << name <<"!";
throw std::runtime_error(ss.str());
}
return i->second;
}
std::vector<String> HMMDB::GetNames() const{
std::vector<String> return_vec;
return_vec.reserve(this->size());
for(std::map<String,HMMPtr>::const_iterator i = data_.begin();
i != data_.end(); ++i){
return_vec.push_back(i->first);
}
return return_vec;
}
}} //ns
......@@ -3,11 +3,20 @@
#include <string>
#include <vector>
#include <map>
#include <fstream>
#include <boost/shared_ptr.hpp>
#include <ost/base.hh>
namespace ost { namespace seq {
class HMM;
class HMMColumn;
class HMMDB;
typedef boost::shared_ptr<HMM> HMMPtr;
typedef boost::shared_ptr<HMMDB> HMMDBPtr;
typedef std::vector<HMMColumn> HMMColumnList;
typedef enum {
HMM_MATCH=0,
HMM_INSERT=1,
......@@ -171,11 +180,6 @@ class HMMColumn {
};
class HMM;
typedef boost::shared_ptr<HMM> HMMPtr;
typedef std::vector<HMMColumn> HMMColumnList;
// Represents a HHsearch profile
// Minimalistic on Purpose.
class HMM {
public:
HMM() {}
......@@ -249,13 +253,33 @@ class HMM {
return is;
}
private:
HMMColumn null_model_;
HMMColumnList columns_;
};
class HMMDB{
public:
void Save(const String& filename) const;
static HMMDBPtr Load(const String& filename);
void AddHMM(const String& name, HMMPtr hmm);
HMMPtr GetHMM(const String& name) const;
size_t size() const { return data_.size(); }
std::vector<String> GetNames() const;
private:
std::map<String,HMMPtr> data_;
};
}}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment