diff --git a/modules/seq/base/src/hmm.cc b/modules/seq/base/src/hmm.cc index 795e6cd55c89bbdd080ec52cb419fb310ed8d9c9..07555a3c5bccac942a7e4912d6a92bab3b4a1dbb 100644 --- a/modules/seq/base/src/hmm.cc +++ b/modules/seq/base/src/hmm.cc @@ -149,6 +149,7 @@ HMMPtr HMM::Load(const std::string& filename) { return hmm; } + Real HMM::GetAverageEntropy() const { Real n_eff=0.0; int n = 0; diff --git a/modules/seq/base/src/hmm.hh b/modules/seq/base/src/hmm.hh index 13c24a1c7277b8bd25dc98af4747fda1f869ee92..9f1871827a730a57b62e51d034f162b17f559b1a 100644 --- a/modules/seq/base/src/hmm.hh +++ b/modules/seq/base/src/hmm.hh @@ -3,6 +3,7 @@ #include <string> #include <vector> +#include <fstream> #include <boost/shared_ptr.hpp> #include <ost/base.hh> @@ -63,6 +64,92 @@ class HMMColumn { Real GetEntropy() const; static HMMColumn BLOSUMNullModel(); + + //functions to feed streams with limited accuracy of internal data + //not intended for python export + + friend std::ofstream& operator<<(std::ofstream& os, HMMColumn& col){ + + char data[61]; + char* data_ptr = &data[0]; + + //transform aa_freq + for(uint i = 0; i < 20; ++i){ + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.freq_[i]*10000); + data_ptr+=2; + } + //transform transition freq + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[0][0]*10000); //M-M + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[0][1]*10000); //M-I + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[0][2]*10000); //M-D + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[1][0]*10000); //I-M + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[1][1]*10000); //I-I + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[2][0]*10000); //D-M + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.trans_[2][2]*10000); //D-D + data_ptr+=2; + + //transform neff values + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.n_eff_*1000); + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.n_eff_ins_*1000); + data_ptr+=2; + *(reinterpret_cast<int16_t*>(data_ptr)) = static_cast<int16_t>(col.n_eff_del_*1000); + data_ptr+=2; + + //finally write one letter code + *data_ptr = col.olc_; + + os.write(data,61); + return os; + } + + friend std::ifstream& operator>>(std::ifstream& is, HMMColumn& col){ + + char data[61]; + char* data_ptr = &data[0]; + is.read(data,61); + + //transform aa_freq + for(uint i = 0; i < 20; ++i){ + col.freq_[i] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; + data_ptr+=2; + } + //transform transition freq + col.trans_[0][0] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //M-M + data_ptr+=2; + col.trans_[0][1] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //M-I + data_ptr+=2; + col.trans_[0][2] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //M-D + data_ptr+=2; + col.trans_[1][0] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //I-M + data_ptr+=2; + col.trans_[1][1] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //I-I + data_ptr+=2; + col.trans_[2][0] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //D-M + data_ptr+=2; + col.trans_[2][2] = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.0001; //D-D + data_ptr+=2; + + //transform neff values + col.n_eff_ = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.001; + data_ptr+=2; + col.n_eff_ins_ = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.001; + data_ptr+=2; + col.n_eff_del_ = *(reinterpret_cast<int16_t*>(data_ptr)) * 0.001; + data_ptr+=2; + + //finally read one letter code + col.olc_ = *data_ptr; + + return is; + } + private: int GetIndex(char ch) const; char olc_; @@ -87,7 +174,6 @@ class HMM { const HMMColumn& GetNullModel() const { return null_model_; } void SetNullModel(const HMMColumn& null_model) { null_model_ = null_model; } - //some functions to make it behave like a vector size_t size() const { return columns_.size(); } @@ -115,6 +201,39 @@ class HMM { HMMColumnList::const_iterator columns_begin() const { return columns_.begin(); } HMMColumnList::iterator columns_begin() { return columns_.begin(); } Real GetAverageEntropy() const; + + friend std::ofstream& operator<<(std::ofstream& os, HMM& hmm){ + + os << hmm.null_model_; + + uint32_t size = hmm.size(); + os.write(reinterpret_cast<char*>(&size),4); + + for(uint i = 0; i < size; ++i){ + os << hmm.columns_[i]; + } + + return os; + } + + friend std::ifstream& operator>>(std::ifstream& is, HMM& hmm){ + + is >> hmm.null_model_; + + uint32_t size; + is.read(reinterpret_cast<char*>(&size),4); + + hmm.columns_.resize(size); + + for(uint i = 0; i < size; ++i){ + is >> hmm.columns_[i]; + } + + return is; + } + + + private: HMMColumn null_model_; HMMColumnList columns_;