From 22f082ddbc50b836dbcbc10a1a261bc37f733589 Mon Sep 17 00:00:00 2001
From: Gerardo Tauriello <gerardo.tauriello@unibas.ch>
Date: Tue, 9 Feb 2016 17:50:22 +0100
Subject: [PATCH] Added seq.ProfileHandle class and io.LoadSequenceProfile to
 work with profiles.

---
 modules/io/pymod/wrap_io.cc                   |   3 +
 modules/io/src/io_manager.cc                  |  47 +++-
 modules/io/src/io_manager.hh                  |  24 +-
 modules/io/src/seq/CMakeLists.txt             |   5 +
 modules/io/src/seq/hhm_io_handler.cc          | 179 ++++++++++++
 modules/io/src/seq/hhm_io_handler.hh          |  52 ++++
 modules/io/src/seq/load.cc                    |  10 +
 modules/io/src/seq/load.hh                    |   6 +-
 modules/io/src/seq/profile_io_handler.hh      |  93 +++++++
 modules/io/src/seq/pssm_io_handler.cc         | 147 ++++++++++
 modules/io/src/seq/pssm_io_handler.hh         |  54 ++++
 modules/io/tests/CMakeLists.txt               |   1 +
 modules/io/tests/test_io_sequence_profile.cc  | 261 ++++++++++++++++++
 modules/io/tests/testfiles/test_hmm.hhm       |  31 +++
 modules/io/tests/testfiles/test_pssm.pssm     |  14 +
 modules/seq/base/pymod/CMakeLists.txt         |   1 +
 .../seq/base/pymod/export_profile_handle.cc   |  80 ++++++
 modules/seq/base/pymod/wrap_seq.cc            |   3 +-
 modules/seq/base/src/CMakeLists.txt           |   2 +
 modules/seq/base/src/hmm.hh                   |   1 +
 modules/seq/base/src/profile_handle.cc        | 220 +++++++++++++++
 modules/seq/base/src/profile_handle.hh        | 250 +++++++++++++++++
 modules/seq/base/tests/CMakeLists.txt         |   2 +-
 modules/seq/base/tests/test_profile.cc        | 144 ++++++++++
 24 files changed, 1618 insertions(+), 12 deletions(-)
 create mode 100644 modules/io/src/seq/hhm_io_handler.cc
 create mode 100644 modules/io/src/seq/hhm_io_handler.hh
 create mode 100644 modules/io/src/seq/profile_io_handler.hh
 create mode 100644 modules/io/src/seq/pssm_io_handler.cc
 create mode 100644 modules/io/src/seq/pssm_io_handler.hh
 create mode 100644 modules/io/tests/test_io_sequence_profile.cc
 create mode 100644 modules/io/tests/testfiles/test_hmm.hhm
 create mode 100644 modules/io/tests/testfiles/test_pssm.pssm
 create mode 100644 modules/seq/base/pymod/export_profile_handle.cc
 create mode 100644 modules/seq/base/src/profile_handle.cc
 create mode 100644 modules/seq/base/src/profile_handle.hh
 create mode 100644 modules/seq/base/tests/test_profile.cc

diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc
index 1e2b51869..6d16af3e9 100644
--- a/modules/io/pymod/wrap_io.cc
+++ b/modules/io/pymod/wrap_io.cc
@@ -99,6 +99,9 @@ BOOST_PYTHON_MODULE(_ost_io)
   def("SequenceListFromString", &SequenceListFromString);
   def("SequenceFromString", &SequenceFromString);  
   def("SaveAlignment", &SaveAlignment, arg("format")="auto");
+  
+  def("LoadSequenceProfile", &LoadSequenceProfile, arg("format")="auto");
+
   def("LoadSurface",LoadSurface,load_surface_ov());
   def("LoadManagedSurface",LoadManagedSurface,load_msurface_ov());
 
diff --git a/modules/io/src/io_manager.cc b/modules/io/src/io_manager.cc
index 6edc7fe43..cfd288c75 100644
--- a/modules/io/src/io_manager.cc
+++ b/modules/io/src/io_manager.cc
@@ -25,6 +25,8 @@
 #include <ost/io/seq/fasta_io_handler.hh>
 #include <ost/io/seq/pir_io_handler.hh>
 #include <ost/io/seq/promod_io_handler.hh>
+#include <ost/io/seq/hhm_io_handler.hh>
+#include <ost/io/seq/pssm_io_handler.hh>
 #include <ost/io/mol/surface_io_msms_handler.hh>
 #include <ost/io/seq/clustal_io_handler.hh>
 #if OST_IMG_ENABLED
@@ -50,10 +52,12 @@ IOManager::IOManager()
   RegisterFactory(EntityIOHandlerFactoryBaseP(new EntityIOCRDHandlerFactory));
   RegisterFactory(EntityIOHandlerFactoryBaseP(new EntityIOSDFHandlerFactory));
   RegisterFactory(EntityIOHandlerFactoryBaseP(new EntityIOMAEHandlerFactory));
-  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new FastaIOHandlerFactory));  
+  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new FastaIOHandlerFactory));
   RegisterFactory(SequenceIOHandlerFactoryBasePtr(new PirIOHandlerFactory));
-  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new ClustalIOHandlerFactory));  
-  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new PromodIOHandlerFactory));    
+  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new ClustalIOHandlerFactory));
+  RegisterFactory(SequenceIOHandlerFactoryBasePtr(new PromodIOHandlerFactory));
+  RegisterFactory(ProfileIOHandlerFactoryBasePtr(new HhmIOHandlerFactory));
+  RegisterFactory(ProfileIOHandlerFactoryBasePtr(new PssmIOHandlerFactory));
   RegisterFactory(SurfaceIOHandlerFactoryBasePtr(new SurfaceIOMSMSHandlerFactory));
 #if OST_IMG_ENABLED  
   RegisterFactory(MapIOHandlerFactoryBasePtr(new MapIODxHandlerFactory));
@@ -103,7 +107,7 @@ EntityIOHandlerP IOManager::FindEntityExportHandler(const String& filename,
 }
 
 SequenceIOHandlerPtr IOManager::FindAlignmentImportHandler(const String& filename,
-                                                        const String& format) {
+                                                           const String& format) {
   for(AlignmentIOFList::const_iterator it=alignment_io_list_.begin();
       it!=alignment_io_list_.end();++it) {
     if( (*it)->ProvidesImport(filename,format)) {
@@ -114,7 +118,7 @@ SequenceIOHandlerPtr IOManager::FindAlignmentImportHandler(const String& filenam
 }
 
 SequenceIOHandlerPtr IOManager::FindAlignmentExportHandler(const String& filename,
-                                                 const String& format)
+                                                           const String& format)
 {
   for(AlignmentIOFList::const_iterator it=alignment_io_list_.begin();
       it!=alignment_io_list_.end();++it) {
@@ -125,6 +129,29 @@ SequenceIOHandlerPtr IOManager::FindAlignmentExportHandler(const String& filenam
   throw IOUnknownFormatException("no suitable alignment io handler found for "+filename);
 }
 
+ProfileIOHandlerPtr IOManager::FindProfileImportHandler(const String& filename,
+                                                        const String& format) {
+  for(ProfileIOFList::const_iterator it=profile_io_list_.begin();
+      it!=profile_io_list_.end();++it) {
+    if( (*it)->ProvidesImport(filename,format)) {
+      return (*it)->Create();
+    }
+  }
+  throw IOUnknownFormatException("no suitable profile io handler found for "+filename);
+}
+
+ProfileIOHandlerPtr IOManager::FindProfileExportHandler(const String& filename,
+                                                        const String& format)
+{
+  for(ProfileIOFList::const_iterator it=profile_io_list_.begin();
+      it!=profile_io_list_.end();++it) {
+    if( (*it)->ProvidesExport(filename,format)) {
+      return (*it)->Create();
+    }
+  }
+  throw IOUnknownFormatException("no suitable profile io handler found for "+filename);
+}
+
 SurfaceIOHandlerPtr IOManager::FindSurfaceImportHandler(const String& filename,
                                                         const String& format)
 {
@@ -267,6 +294,11 @@ void IOManager::RegisterFactory(const SequenceIOHandlerFactoryBasePtr& f)
   alignment_io_list_.push_back(f);
 }
 
+void IOManager::RegisterFactory(const ProfileIOHandlerFactoryBasePtr& f) 
+{
+  profile_io_list_.push_back(f);
+}
+
 void IOManager::RegisterFactory(const SurfaceIOHandlerFactoryBasePtr& f) 
 {
   surface_io_list_.push_back(f);
@@ -282,6 +314,11 @@ const AlignmentIOFList& IOManager::GetAvailableAlignmentHandler() const
   return alignment_io_list_;
 }
 
+const ProfileIOFList& IOManager::GetAvailableProfileHandler() const
+{
+  return profile_io_list_;
+}
+
 const SurfaceIOFList& IOManager::GetAvailableSurfaceHandler() const
 {
   return surface_io_list_;
diff --git a/modules/io/src/io_manager.hh b/modules/io/src/io_manager.hh
index bfebe32bd..18297af6d 100644
--- a/modules/io/src/io_manager.hh
+++ b/modules/io/src/io_manager.hh
@@ -26,6 +26,7 @@
 
 #include <ost/io/mol/entity_io_handler.hh>
 #include <ost/io/seq/sequence_io_handler.hh>
+#include <ost/io/seq/profile_io_handler.hh>
 #include <ost/io/mol/surface_io_handler.hh>
 
 #if OST_IMG_ENABLED
@@ -33,10 +34,12 @@
 #endif
 
 #include <ost/io/io_exception.hh>
+
 namespace ost { namespace io {
 
 typedef std::vector<EntityIOHandlerFactoryBaseP> EntityIOHFList;
 typedef std::vector<SequenceIOHandlerFactoryBasePtr> AlignmentIOFList;
+typedef std::vector<ProfileIOHandlerFactoryBasePtr> ProfileIOFList;
 typedef std::vector<SurfaceIOHandlerFactoryBasePtr> SurfaceIOFList;
 
 #if OST_IMG_ENABLED
@@ -77,11 +80,17 @@ public:
   /// \brief get sequence io handler that is able to import the given file.
   /// \sa FindEntityImportHandler(filename)       
   SequenceIOHandlerPtr FindAlignmentImportHandler(const String& filename,
-                                                   const String& format="auto");
+                                                  const String& format="auto");
                                                    
   SequenceIOHandlerPtr FindAlignmentExportHandler(const String& filename,
-                                                   const String& format="auto");                                                   
-  
+                                                  const String& format="auto");
+
+  ProfileIOHandlerPtr FindProfileImportHandler(const String& filename,
+                                               const String& format="auto");
+                                                   
+  ProfileIOHandlerPtr FindProfileExportHandler(const String& filename,
+                                               const String& format="auto");
+
   SurfaceIOHandlerPtr FindSurfaceImportHandler(const String& filename,
                                                const String& format="auto");
   //@}
@@ -90,7 +99,10 @@ public:
   void RegisterFactory(const EntityIOHandlerFactoryBaseP&);
   /// \brief register aligment io handler factory
   /// \sa adding_io_handler
-  void RegisterFactory(const SequenceIOHandlerFactoryBasePtr&);  
+  void RegisterFactory(const SequenceIOHandlerFactoryBasePtr&);
+  /// \brief register profile io handler factory
+  /// \sa adding_io_handler
+  void RegisterFactory(const ProfileIOHandlerFactoryBasePtr&);
   /// \brief register surface io handler factory
   /// \sa adding_io_handler
   void RegisterFactory(const SurfaceIOHandlerFactoryBasePtr&);  
@@ -101,6 +113,9 @@ public:
   /// \brief Get a list with all available AlignmentHandler
   const AlignmentIOFList& GetAvailableAlignmentHandler() const;
 
+  /// \brief Get a list with all available ProfileHandler
+  const ProfileIOFList& GetAvailableProfileHandler() const;
+
   /// \brief Get a list with all available SurfaceHandler
   const SurfaceIOFList& GetAvailableSurfaceHandler() const;
 
@@ -137,6 +152,7 @@ private:
 
   EntityIOHFList entity_iohf_list_;
   AlignmentIOFList alignment_io_list_;
+  ProfileIOFList profile_io_list_;
   SurfaceIOFList surface_io_list_;
 
 #if OST_IMG_ENABLED
diff --git a/modules/io/src/seq/CMakeLists.txt b/modules/io/src/seq/CMakeLists.txt
index 9dd14b52a..605eb9546 100644
--- a/modules/io/src/seq/CMakeLists.txt
+++ b/modules/io/src/seq/CMakeLists.txt
@@ -5,6 +5,8 @@ save.cc
 fasta_io_handler.cc
 pir_io_handler.cc
 promod_io_handler.cc
+hhm_io_handler.cc
+pssm_io_handler.cc
 PARENT_SCOPE
 )
 
@@ -13,6 +15,9 @@ sequence_io_handler.hh
 fasta_io_handler.hh
 pir_io_handler.hh
 promod_io_handler.hh
+profile_io_handler.hh
+hhm_io_handler.hh
+pssm_io_handler.hh
 clustal_io_handler.hh
 load.hh
 save.hh
diff --git a/modules/io/src/seq/hhm_io_handler.cc b/modules/io/src/seq/hhm_io_handler.cc
new file mode 100644
index 000000000..395c97bd1
--- /dev/null
+++ b/modules/io/src/seq/hhm_io_handler.cc
@@ -0,0 +1,179 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+  
+#include "hhm_io_handler.hh"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem/convenience.hpp>
+#include <boost/filesystem/fstream.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+
+#include <ost/string_ref.hh>
+#include <ost/io/io_exception.hh>
+#include <ost/io/io_utils.hh>
+
+namespace ost { namespace io {
+
+namespace {
+// reset pc with freqs in chunks[i+offset] (i in [0,20[)
+// checks for consistency of chunks and values read
+seq::ProfileColumn GetColumn(const std::vector<ost::StringRef>& chunks,
+                             const uint offset, const char* olcs,
+                             const String& exception) {
+  // check chunks size
+  if (chunks.size() < offset+20) {
+    throw IOException(exception);
+  }
+  // reset pc
+  seq::ProfileColumn pc; // all 0
+  for (uint i = 0; i < 20; ++i) {
+    if (chunks[i+offset] != ost::StringRef("*", 1)) {
+      std::pair<bool, int> pbi = chunks[i+offset].to_int();
+      if (!pbi.first) {
+        throw IOException(exception);
+      }
+      pc.SetFreq(olcs[i], pow(2.0, -0.001 * pbi.second));
+    }
+  }
+  return pc;
+}
+} // anon ns
+
+void HhmIOHandler::Import(seq::ProfileHandle& prof,
+                          const boost::filesystem::path& loc) {
+  // open it up
+  boost::iostreams::filtering_stream<boost::iostreams::input> in;
+  boost::filesystem::ifstream stream(loc);
+  if (!stream) {
+    throw IOException("Could not open " + loc.string());
+  }
+  // add unzip if necessary
+  if (boost::iequals(".gz", boost::filesystem::extension(loc))) {
+    in.push(boost::iostreams::gzip_decompressor());
+  }
+  in.push(stream);
+
+  // reset profile
+  prof.clear();
+  
+  // tmp. storage
+  std::string line;
+  ost::StringRef sline;
+  std::string null_line;
+  char olcs[20];
+  std::vector<ost::StringRef> chunks;
+
+  // read line-by-line looking for NULL
+  bool null_found = false;
+  while (std::getline(in, line)) {
+    sline = ost::StringRef(line.c_str(), line.length());
+    if (sline.length()>4 && 
+        sline.substr(0, 5) == ost::StringRef("NULL ", 5)) {
+      null_line = line;
+      null_found = true;
+      break;
+    }
+  }
+  if (!null_found) {
+    throw IOException("No NULL found in file " + loc.string());
+  }
+
+  // read until we hit HMM, extract olcs, then skip 2 more lines
+  bool hmm_found = false;
+  while (std::getline(in, line)) {
+    sline = ost::StringRef(line.c_str(), line.length());
+    chunks = sline.split();
+    if (chunks[0] == ost::StringRef("HMM", 3)) {
+      // extract olcs
+      if (chunks.size() != 21) {
+        throw IOException("Badly formatted HMM line in " + loc.string());
+      }
+      for (uint i = 1; i < 21; ++i) {
+        if (chunks[i].length() != 1) {
+          throw IOException("Badly formatted HMM line in " + loc.string());
+        }
+        olcs[i-1] = chunks[i][0];
+      }
+      // skip 2 lines and get out
+      std::getline(in, line);
+      std::getline(in, line);
+      hmm_found = true;
+      break;
+    }
+  }
+  if (!hmm_found) {
+    throw IOException("No HMM found in file " + loc.string());
+  }
+
+  // set null model
+  chunks = ost::StringRef(null_line.c_str(), null_line.length()).split();
+  prof.SetNullModel(GetColumn(chunks, 1, olcs, "Badly formatted NULL line\n"
+                              + null_line + "\n in " + loc.string()));
+
+  // set columns
+  String seqres;
+  while (std::getline(in, line)) {
+    sline = ost::StringRef(line.c_str(), line.length());
+    if (sline.trim().empty()) continue;
+    if (sline.trim() == ost::StringRef("//", 2)) break;
+    chunks = sline.split();
+    // one letter code
+    seqres += chunks[0][0];
+    // frequencies
+    prof.push_back(GetColumn(chunks, 2, olcs, "Badly formatted line\n"
+                             + line + "\n in " + loc.string()));
+    // skip line (trans. matrix)
+    std::getline(in, line);
+  }
+  prof.SetSequence(seqres);
+}
+
+void HhmIOHandler::Export(const seq::ProfileHandle& prof,
+                          const boost::filesystem::path& loc) const {
+  throw IOException("Cannot write hhm files.");
+}
+
+bool HhmIOHandler::ProvidesImport(const boost::filesystem::path& loc, 
+                                  const String& format) {
+  if (format=="auto") {
+    String match_suf_string = loc.string();
+    std::transform(match_suf_string.begin(), match_suf_string.end(),
+                   match_suf_string.begin(), tolower);
+    if (   detail::FilenameEndsWith(match_suf_string,".hhm")
+        || detail::FilenameEndsWith(match_suf_string,".hhm.gz")) {
+      return true;
+    }
+  } else if (format == "hhm") {
+    return true;
+  }
+  return false;
+}
+
+bool HhmIOHandler::ProvidesExport(const boost::filesystem::path& loc, 
+                                  const String& format) {
+  // no writers here
+  return false;
+}
+
+}}
diff --git a/modules/io/src/seq/hhm_io_handler.hh b/modules/io/src/seq/hhm_io_handler.hh
new file mode 100644
index 000000000..a3acd275c
--- /dev/null
+++ b/modules/io/src/seq/hhm_io_handler.hh
@@ -0,0 +1,52 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello
+ */
+
+#ifndef OST_HHM_IO_HANDLER_HH
+#define OST_HHM_IO_HANDLER_HH
+
+#include <ost/io/module_config.hh>
+#include "profile_io_handler.hh"
+
+namespace ost { namespace io {
+
+class DLLEXPORT_OST_IO HhmIOHandler : public ProfileIOHandler {
+public:
+  virtual void Import(seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc);
+  virtual void Export(const seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc) const;         
+
+  static bool ProvidesImport(const boost::filesystem::path& loc, 
+                             const String& format="auto");
+  static bool ProvidesExport(const boost::filesystem::path& loc, 
+                             const String& format="auto");
+
+  static String GetFormatName() { return String("HHM"); }
+  static String GetFormatDescription() { return String("HHM output of HHblits"); }
+};
+
+typedef ProfileIOHandlerFactory<HhmIOHandler> HhmIOHandlerFactory; 
+
+}}
+
+#endif
diff --git a/modules/io/src/seq/load.cc b/modules/io/src/seq/load.cc
index 896c5fa97..9bca49efe 100644
--- a/modules/io/src/seq/load.cc
+++ b/modules/io/src/seq/load.cc
@@ -101,4 +101,14 @@ seq::SequenceHandle SequenceFromString(const String& data,
  return get_one_seq(SequenceListFromString(data, format));
 }
 
+seq::ProfileHandlePtr LoadSequenceProfile(const String& file_name,
+                                          const String& format)
+{
+  seq::ProfileHandlePtr prof(new seq::ProfileHandle);
+  IOManager& m = IOManager::Instance();
+  ProfileIOHandlerPtr prof_io = m.FindProfileImportHandler(file_name, format);
+  prof_io->Import(*prof, file_name);
+  return prof;
+}
+
 }}
diff --git a/modules/io/src/seq/load.hh b/modules/io/src/seq/load.hh
index 485fef1d8..a72cee4b2 100644
--- a/modules/io/src/seq/load.hh
+++ b/modules/io/src/seq/load.hh
@@ -26,8 +26,9 @@
 #include <ost/io/module_config.hh>
 #include <ost/seq/sequence_list.hh>
 #include <ost/seq/alignment_handle.hh>
-namespace ost { namespace io {
+#include <ost/seq/profile_handle.hh>
 
+namespace ost { namespace io {
 
 seq::AlignmentHandle DLLEXPORT_OST_IO 
 LoadAlignment(const String& file_name, const String& format="auto");
@@ -57,6 +58,9 @@ SequenceFromStream(std::istream&  stream, const String& format);
 seq::SequenceHandle DLLEXPORT_OST_IO 
 SequenceFromString(const String& data, const String& format);
 
+seq::ProfileHandlePtr DLLEXPORT_OST_IO 
+LoadSequenceProfile(const String& file_name, const String& format="auto");
+
 }}
 
 #endif
diff --git a/modules/io/src/seq/profile_io_handler.hh b/modules/io/src/seq/profile_io_handler.hh
new file mode 100644
index 000000000..783d69762
--- /dev/null
+++ b/modules/io/src/seq/profile_io_handler.hh
@@ -0,0 +1,93 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello
+ */
+
+#ifndef OST_IO_PROFILE_IO_HANDLER_HH
+#define OST_IO_PROFILE_IO_HANDLER_HH
+
+#include <boost/shared_ptr.hpp>
+#include <boost/filesystem/operations.hpp>
+
+#include <ost/io/module_config.hh>
+#include <ost/io/io_utils.hh>
+#include <ost/seq/profile_handle.hh>
+
+namespace ost { namespace io {
+
+//! pure abstract base class for profile io handlers
+class DLLEXPORT_OST_IO ProfileIOHandler {
+public:
+  virtual ~ProfileIOHandler() {}
+
+  virtual void Import(seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc) = 0;
+
+  virtual void Export(const seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc) const = 0;
+};
+
+typedef boost::shared_ptr<ProfileIOHandler> ProfileIOHandlerPtr;
+
+//! pure abstract base class for creation of a specfic io handler
+class DLLEXPORT_OST_IO ProfileIOHandlerFactoryBase {
+public:
+  virtual ~ProfileIOHandlerFactoryBase() {}
+  virtual bool ProvidesImport(const boost::filesystem::path& loc, 
+                              const String& format="auto") const = 0;
+  virtual bool ProvidesExport(const boost::filesystem::path& loc, 
+                              const String& format="auto") const = 0;
+  virtual ProfileIOHandlerPtr Create() const = 0;
+  virtual String GetFormatName() const =0;
+  virtual String GetFormatDescription() const =0;
+};
+
+typedef boost::shared_ptr<ProfileIOHandlerFactoryBase> ProfileIOHandlerFactoryBasePtr;
+
+template <class HANDLER>
+class ProfileIOHandlerFactory: public ProfileIOHandlerFactoryBase {
+  virtual bool ProvidesImport(const boost::filesystem::path& loc, 
+                              const String& format="auto") const {
+    return HANDLER::ProvidesImport(loc, format);
+  }
+
+  virtual bool ProvidesExport(const boost::filesystem::path& loc, 
+                              const String& format="auto") const {
+    return HANDLER::ProvidesExport(loc, format);
+  }
+
+  virtual ProfileIOHandlerPtr Create() const {
+    return ProfileIOHandlerPtr(new HANDLER);
+  }
+
+  virtual String GetFormatName() const {
+     return HANDLER::GetFormatName();
+  }
+
+  virtual String GetFormatDescription() const {
+     return HANDLER::GetFormatDescription();
+  }
+
+};
+
+}} // ns
+
+#endif
diff --git a/modules/io/src/seq/pssm_io_handler.cc b/modules/io/src/seq/pssm_io_handler.cc
new file mode 100644
index 000000000..4b16ed6cb
--- /dev/null
+++ b/modules/io/src/seq/pssm_io_handler.cc
@@ -0,0 +1,147 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello
+ */
+
+#include "pssm_io_handler.hh"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem/convenience.hpp>
+#include <boost/filesystem/fstream.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+
+#include <ost/string_ref.hh>
+#include <ost/io/io_exception.hh>
+#include <ost/io/io_utils.hh>
+
+namespace ost { namespace io {
+
+void PssmIOHandler::Import(seq::ProfileHandle& prof,
+                          const boost::filesystem::path& loc) {
+  // open it up
+  boost::iostreams::filtering_stream<boost::iostreams::input> in;
+  boost::filesystem::ifstream stream(loc);
+  if (!stream) {
+    throw IOException("Could not open " + loc.string());
+  }
+  // add unzip if necessary
+  if (boost::iequals(".gz", boost::filesystem::extension(loc))) {
+    in.push(boost::iostreams::gzip_decompressor());
+  }
+  in.push(stream);
+
+  // reset profile
+  prof.clear();
+
+  // set null model
+  prof.SetNullModel(seq::ProfileColumn::BLOSUMNullModel());
+
+  // tmp. storage
+  std::string line;
+  ost::StringRef sline;
+  char olcs[20];
+  std::vector<ost::StringRef> chunks;
+
+  // read line-by-line looking for table
+  // -> start = line with 40 1-char tokens
+  bool table_found = false;
+  while (std::getline(in, line)) {
+    sline = ost::StringRef(line.c_str(), line.length());
+    chunks = sline.split();
+    if (chunks.size() == 40) {
+      bool got_it = true;
+      for (uint i = 0; i < 40; ++i) {
+        if (chunks[i].length() != 1) {
+          got_it = false;
+          break;
+        }
+        if (i >= 20) {
+          olcs[i-20] = chunks[i][0];
+        }
+      }
+      if (got_it) {
+        table_found = true;
+        break;
+      }
+    }
+  }
+  if (!table_found) {
+    throw IOException("No ASCII table found in file " + loc.string());
+  }
+
+  // parse table (assume: index olc 20xscore 20xfreq)
+  String seqres;
+  while (std::getline(in, line)) {
+    sline = ost::StringRef(line.c_str(), line.length());
+    chunks = sline.split();
+    // stop if not enough entries
+    if (chunks.size() < 42) break;
+    // parse row (freq in % as integers)
+    seqres += chunks[1][0];
+    Real sum_freq = 0;
+    Real freqs[20];
+    for (uint i = 22; i < 42; ++i) {
+      std::pair<bool, int> pbi = chunks[i].to_int();
+      if (!pbi.first) {
+        throw IOException("Badly formatted line\n" + line + "\n in " + loc.string());
+      }
+      sum_freq += pbi.second;
+      freqs[i-22] = pbi.second;
+    }
+    // fill normalized frequencies
+    seq::ProfileColumn pc;
+    for (uint i = 0; i < 20; ++i) {
+      pc.SetFreq(olcs[i], freqs[i]/sum_freq);
+    }
+    prof.push_back(pc);
+  }
+  prof.SetSequence(seqres);
+}
+
+void PssmIOHandler::Export(const seq::ProfileHandle& prof,
+                          const boost::filesystem::path& loc) const {
+  throw IOException("Cannot write pssm files.");
+}
+
+bool PssmIOHandler::ProvidesImport(const boost::filesystem::path& loc, 
+                                  const String& format) {
+  if (format=="auto") {
+    String match_suf_string = loc.string();
+    std::transform(match_suf_string.begin(), match_suf_string.end(),
+                   match_suf_string.begin(), tolower);
+    if (   detail::FilenameEndsWith(match_suf_string,".pssm")
+        || detail::FilenameEndsWith(match_suf_string,".pssm.gz")) {
+      return true;
+    }
+  } else if (format == "pssm") {
+    return true;
+  }
+  return false;
+}
+
+bool PssmIOHandler::ProvidesExport(const boost::filesystem::path& loc, 
+                                  const String& format) {
+  // no writers here
+  return false;
+}
+
+}}
diff --git a/modules/io/src/seq/pssm_io_handler.hh b/modules/io/src/seq/pssm_io_handler.hh
new file mode 100644
index 000000000..87a00673b
--- /dev/null
+++ b/modules/io/src/seq/pssm_io_handler.hh
@@ -0,0 +1,54 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello
+ */
+
+#ifndef OST_PSSM_IO_HANDLER_HH
+#define OST_PSSM_IO_HANDLER_HH
+
+#include <ost/io/module_config.hh>
+#include "profile_io_handler.hh"
+
+namespace ost { namespace io {
+
+class DLLEXPORT_OST_IO PssmIOHandler : public ProfileIOHandler {
+public:
+  virtual void Import(seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc);
+  virtual void Export(const seq::ProfileHandle& prof,
+                      const boost::filesystem::path& loc) const;         
+
+  static bool ProvidesImport(const boost::filesystem::path& loc, 
+                             const String& format="auto");
+  static bool ProvidesExport(const boost::filesystem::path& loc, 
+                             const String& format="auto");
+
+  static String GetFormatName() { return String("PSSM"); }
+  static String GetFormatDescription() {
+    return String("ASCII Table (PSSM) output of PSI-BLAST (flag -Q)");
+  }
+};
+
+typedef ProfileIOHandlerFactory<PssmIOHandler> PssmIOHandlerFactory; 
+
+}}
+
+#endif
diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt
index 64a310415..0efbc1058 100644
--- a/modules/io/tests/CMakeLists.txt
+++ b/modules/io/tests/CMakeLists.txt
@@ -6,6 +6,7 @@ set(OST_IO_UNIT_TESTS
   test_io_crd.cc
   test_io_dcd.cc
   test_io_sdf.cc
+  test_io_sequence_profile.cc
   test_pir.cc
   test_iomanager.cc
   tests.cc
diff --git a/modules/io/tests/test_io_sequence_profile.cc b/modules/io/tests/test_io_sequence_profile.cc
new file mode 100644
index 000000000..85d2dc602
--- /dev/null
+++ b/modules/io/tests/test_io_sequence_profile.cc
@@ -0,0 +1,261 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+
+#define BOOST_TEST_DYN_LINK
+#include <boost/test/unit_test.hpp>
+#include <boost/test/auto_unit_test.hpp>
+#include <iostream>
+
+#include <ost/seq/profile_handle.hh>
+#include <ost/io/seq/load.hh>
+#include <ost/io/seq/hhm_io_handler.hh>
+#include <ost/io/seq/pssm_io_handler.hh>
+
+using namespace ost::seq;
+using namespace ost::io;
+
+BOOST_AUTO_TEST_SUITE( io );
+
+// helper to compare profiles element by element (with delta)
+// delta given in % !
+template <typename T, typename T2>
+void compareProfiles(T& t, T2& t2, Real delta = 0) {
+  BOOST_REQUIRE_EQUAL(t.size(), t2.size());
+  BOOST_CHECK_EQUAL(t.GetSequence(), t2.GetSequence());
+  BOOST_CHECK_CLOSE(t.GetAverageEntropy(), t2.GetAverageEntropy(), delta*20);
+  const Real* f = t.GetNullModel().freqs_begin();
+  const Real* f2 = t2.GetNullModel().freqs_begin();
+  for (uint j = 0; j < 20; ++j) {
+    BOOST_CHECK_CLOSE(f[j], f2[j], delta);
+  }
+  uint N = t.size();
+  for (uint i = 0; i < N; ++i) {
+    f = t[i].freqs_begin();
+    f2 = t2[i].freqs_begin();
+    for (uint j = 0; j < 20; ++j) {
+      BOOST_CHECK_CLOSE(f[j], f2[j], delta);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_CASE(hhm_loading)
+{
+  // check hhm factory
+  ProfileIOHandlerFactoryBasePtr fac(new HhmIOHandlerFactory);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.hhm"),     true);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.hhm.gz"),  true);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.pssm"),    false);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.pssm.gz"), false);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test", "hhm"),  true);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test.hhm"),     false);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test.hhm.gz"),  false);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test", "hhm"),  false);
+  ProfileIOHandlerPtr io_handler = fac->Create();
+
+  // load file
+  ProfileHandlePtr prof(new ProfileHandle);
+  io_handler->Import(*prof, "testfiles/test_hmm.hhm");
+  BOOST_CHECK_EQUAL(prof->size(), size_t(5));
+  BOOST_CHECK_EQUAL(prof->GetSequence(), "ETGSP");
+
+  // we first check, whether the null model has been read correctly
+  // and then randomly pick a column to check
+
+  ProfileColumn null_model = prof->GetNullModel();
+
+  String olc = "ACDEFGHIKLMNPQRSTVWY";
+
+  Real correct_aa_freqs[] = {pow(2.0,-0.001*3706), pow(2.0,-0.001*5728), 
+                             pow(2.0,-0.001*4211), pow(2.0,-0.001*4064), 
+                             pow(2.0,-0.001*4839), pow(2.0,-0.001*3729),  
+                             pow(2.0,-0.001*4763), pow(2.0,-0.001*4308), 
+                             pow(2.0,-0.001*4069), pow(2.0,-0.001*3323), 
+                             pow(2.0,-0.001*5509), pow(2.0,-0.001*4640),  
+                             pow(2.0,-0.001*4464), pow(2.0,-0.001*4937), 
+                             pow(2.0,-0.001*4285), pow(2.0,-0.001*4423), 
+                             pow(2.0,-0.001*3815), pow(2.0,-0.001*3783),  
+                             pow(2.0,-0.001*6325), pow(2.0,-0.001*4665)};
+
+  for (int i = 0; i < 20; ++i) {
+    BOOST_CHECK_CLOSE(null_model.GetFreq(olc[i]), correct_aa_freqs[i],
+                      Real(1e-5));
+  }
+
+  // let's check, whether a particular column has been read correctly  
+  ProfileColumn col = (*prof)[2];
+
+  memset(correct_aa_freqs, 0, 20*sizeof(Real));
+  correct_aa_freqs[0] = pow(2.0,-0.001*3676);
+  correct_aa_freqs[1] = pow(2.0,-0.001*2597);
+  correct_aa_freqs[5] = pow(2.0,-0.001*5359);
+  correct_aa_freqs[6] = pow(2.0,-0.001*3275);
+  correct_aa_freqs[11] = pow(2.0,-0.001*3292);
+  correct_aa_freqs[12] = pow(2.0,-0.001*5077);
+  correct_aa_freqs[13] = pow(2.0,-0.001*3826);
+  correct_aa_freqs[15] = pow(2.0,-0.001*2409);
+  correct_aa_freqs[16] = pow(2.0,-0.001*3733);
+  correct_aa_freqs[17] = pow(2.0,-0.001*4503);
+  correct_aa_freqs[19] = pow(2.0,-0.001*3070);
+
+  for(int i = 0; i < 20; ++i){
+    BOOST_CHECK_CLOSE(col.GetFreq(olc[i]), correct_aa_freqs[i], Real(1e-5));
+  }
+
+  // check, whether the entropy of the column above gets correctly calculated
+  Real entropy = 0.0;
+  entropy -= correct_aa_freqs[0] * log(correct_aa_freqs[0]);
+  entropy -= correct_aa_freqs[1] * log(correct_aa_freqs[1]);
+  entropy -= correct_aa_freqs[5] * log(correct_aa_freqs[5]);
+  entropy -= correct_aa_freqs[6] * log(correct_aa_freqs[6]);
+  entropy -= correct_aa_freqs[11] * log(correct_aa_freqs[11]);
+  entropy -= correct_aa_freqs[12] * log(correct_aa_freqs[12]);
+  entropy -= correct_aa_freqs[13] * log(correct_aa_freqs[13]);
+  entropy -= correct_aa_freqs[15] * log(correct_aa_freqs[15]);
+  entropy -= correct_aa_freqs[16] * log(correct_aa_freqs[16]);
+  entropy -= correct_aa_freqs[17] * log(correct_aa_freqs[17]);
+  entropy -= correct_aa_freqs[19] * log(correct_aa_freqs[19]);
+  BOOST_CHECK_CLOSE((*prof)[2].GetEntropy(), entropy, Real(1e-5));
+
+  // look at avg entropy...
+  BOOST_CHECK_CLOSE(prof->GetAverageEntropy(), 1.48704, Real(1e-2));
+
+  // reload with LoadSequenceProfile
+  ProfileHandlePtr prof2 = LoadSequenceProfile("testfiles/test_hmm.hhm");
+  BOOST_CHECK(*prof == *prof2);
+
+  // reload
+  io_handler->Import(*prof2, "testfiles/test_hmm.hhm");
+  BOOST_CHECK(*prof == *prof2);
+}
+
+BOOST_AUTO_TEST_CASE(pssm_loading)
+{
+  // check pssm factory
+  ProfileIOHandlerFactoryBasePtr fac(new PssmIOHandlerFactory);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.hhm"),     false);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.hhm.gz"),  false);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.pssm"),    true);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test.pssm.gz"), true);
+  BOOST_CHECK_EQUAL(fac->ProvidesImport("test", "pssm"), true);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test.pssm"),    false);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test.pssm.gz"), false);
+  BOOST_CHECK_EQUAL(fac->ProvidesExport("test", "pssm"), false);
+  ProfileIOHandlerPtr io_handler = fac->Create();
+
+  // load file
+  ProfileHandlePtr prof(new ProfileHandle);
+  io_handler->Import(*prof, "testfiles/test_pssm.pssm");
+
+  // compare manually
+  ProfileHandle prof_pssm_ref;
+  prof_pssm_ref.SetSequence("RRSPP");
+  prof_pssm_ref.SetNullModel(ProfileColumn::BLOSUMNullModel());
+  ProfileColumn pc1, pc2, pc3, pc4, pc5;
+  pc1.SetFreq('R', 0.74);
+  pc1.SetFreq('Q', 0.01);
+  pc1.SetFreq('K', 0.25);
+
+  pc2.SetFreq('A', 0.02/0.99);
+  pc2.SetFreq('R', 0.61/0.99);
+  pc2.SetFreq('N', 0.03/0.99);
+  pc2.SetFreq('Q', 0.02/0.99);
+  pc2.SetFreq('E', 0.02/0.99);
+  pc2.SetFreq('G', 0.02/0.99);
+  pc2.SetFreq('H', 0.01/0.99);
+  pc2.SetFreq('I', 0.01/0.99);
+  pc2.SetFreq('L', 0.01/0.99);
+  pc2.SetFreq('K', 0.13/0.99);
+  pc2.SetFreq('P', 0.03/0.99);
+  pc2.SetFreq('S', 0.06/0.99);
+  pc2.SetFreq('T', 0.01/0.99);
+  pc2.SetFreq('V', 0.01/0.99);
+  pc3.SetFreq('A', 0.17/0.99);
+  pc3.SetFreq('R', 0.07/0.99);
+  pc3.SetFreq('N', 0.03/0.99);
+
+  pc3.SetFreq('A', 0.17);
+  pc3.SetFreq('R', 0.07);
+  pc3.SetFreq('N', 0.03);
+  pc3.SetFreq('D', 0.02);
+  pc3.SetFreq('C', 0.01);
+  pc3.SetFreq('Q', 0.03);
+  pc3.SetFreq('E', 0.02);
+  pc3.SetFreq('G', 0.03);
+  pc3.SetFreq('L', 0.02);
+  pc3.SetFreq('K', 0.04);
+  pc3.SetFreq('M', 0.01);
+  pc3.SetFreq('F', 0.01);
+  pc3.SetFreq('P', 0.06);
+  pc3.SetFreq('S', 0.37);
+  pc3.SetFreq('T', 0.08);
+  pc3.SetFreq('V', 0.03);
+
+  pc4.SetFreq('A', 0.11/0.98);
+  pc4.SetFreq('R', 0.06/0.98);
+  pc4.SetFreq('D', 0.01/0.98);
+  pc4.SetFreq('Q', 0.02/0.98);
+  pc4.SetFreq('E', 0.02/0.98);
+  pc4.SetFreq('I', 0.01/0.98);
+  pc4.SetFreq('L', 0.03/0.98);
+  pc4.SetFreq('K', 0.04/0.98);
+  pc4.SetFreq('F', 0.01/0.98);
+  pc4.SetFreq('P', 0.58/0.98);
+  pc4.SetFreq('S', 0.03/0.98);
+  pc4.SetFreq('T', 0.05/0.98);
+  pc4.SetFreq('V', 0.01/0.98);
+
+  pc5.SetFreq('A', 0.04);
+  pc5.SetFreq('R', 0.01);
+  pc5.SetFreq('N', 0.02);
+  pc5.SetFreq('D', 0.03);
+  pc5.SetFreq('Q', 0.02);
+  pc5.SetFreq('E', 0.03);
+  pc5.SetFreq('H', 0.01);
+  pc5.SetFreq('I', 0.01);
+  pc5.SetFreq('L', 0.04);
+  pc5.SetFreq('K', 0.05);
+  pc5.SetFreq('M', 0.01);
+  pc5.SetFreq('F', 0.01);
+  pc5.SetFreq('P', 0.63);
+  pc5.SetFreq('S', 0.03);
+  pc5.SetFreq('T', 0.02);
+  pc5.SetFreq('V', 0.04);
+
+  prof_pssm_ref.push_back(pc1);
+  prof_pssm_ref.push_back(pc2);
+  prof_pssm_ref.push_back(pc3);
+  prof_pssm_ref.push_back(pc4);
+  prof_pssm_ref.push_back(pc5);
+
+  compareProfiles(*prof, prof_pssm_ref, 1e-3);
+
+  // reload with LoadSequenceProfile
+  ProfileHandlePtr prof2 = LoadSequenceProfile("testfiles/test_pssm.pssm");
+  BOOST_CHECK(*prof == *prof2);
+
+  // reload
+  io_handler->Import(*prof2, "testfiles/test_pssm.pssm");
+  BOOST_CHECK(*prof == *prof2);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
diff --git a/modules/io/tests/testfiles/test_hmm.hhm b/modules/io/tests/testfiles/test_hmm.hhm
new file mode 100644
index 000000000..1b9a665d1
--- /dev/null
+++ b/modules/io/tests/testfiles/test_hmm.hhm
@@ -0,0 +1,31 @@
+HHsearch 1.5
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+I don't get parsed
+#
+NULL   3706	5728	4211	4064	4839	3729	4763	4308	4069	3323	5509	4640	4464	4937	4285	4423	3815	3783	6325	4665	
+HMM    A	C	D	E	F	G	H	I	K	L	M	N	P	Q	R	S	T	V	W	Y	
+       M->M	M->I	M->D	I->M	I->I	D->M	D->D	Neff	Neff_I	Neff_D
+       0	*	*	0	*	0	*	*	*	*	
+E 1    1780	*	*	497	*	*	*	*	*	*	*	*	*	*	*	*	*	*	*	*	1
+       0	*	*	*	*	*	*	1610	0	0	
+
+T 2    1615	*	*	*	*	2652	*	*	*	*	*	*	*	*	*	*	3992	1147	*	*	2
+       0	*	*	*	*	*	*	2652	0	0	
+
+G 3    3676	2597	*	*	*	5359	3275	*	*	*	*	3292	5077	3826	*	2409	3733	4503	*	3070	3
+       46	4979	*	2006	413	*	*	3076	1097	0	
+
+S 4    2579	*	*	*	*	*	*	*	*	4320	*	4532	3895	4519	*	951	5140	3595	*	*	4
+       58	4660	*	2833	218	*	*	3050	1146	0	
+
+P 5    2585	*	4184	*	*	2345	4748	*	*	*	*	*	2839	1766	5056	3635	*	*	*	*	5
+       0	*	*	*	*	*	*	3036	0	0		
+
+//
diff --git a/modules/io/tests/testfiles/test_pssm.pssm b/modules/io/tests/testfiles/test_pssm.pssm
new file mode 100644
index 000000000..e3f72045c
--- /dev/null
+++ b/modules/io/tests/testfiles/test_pssm.pssm
@@ -0,0 +1,14 @@
+
+Last position-specific scoring matrix computed, weighted observed percentages rounded down, information per position, and relative weight of gapless real matches to pseudocounts
+           A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V   A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V
+    1 R   -4  7 -3 -4 -7 -1 -3 -5 -3 -6 -5  4 -4 -6 -5 -4 -4 -6 -5 -6    0  74   0   0   0   1   0   0   0   0   0  25   0   0   0   0   0   0   0   0  1.96 0.65
+    2 R   -2  7  0 -4 -6  0 -1 -3 -1 -4 -4  2 -4 -6 -1  0 -3 -6 -5 -4    2  61   3   0   0   2   2   2   1   1   1  13   0   0   3   6   1   0   0   1  1.22 0.52
+    3 S    2  1  0 -1 -2  0 -1 -1 -2 -4 -3  0 -2 -2  1  4  1 -5 -4 -2   17   7   3   2   1   3   2   3   0   0   2   4   1   1   6  37   8   0   0   3  0.42 0.31
+    4 P    1  0 -5 -3 -6 -2 -3 -5 -3 -4 -3 -1 -5 -3  7 -1  0 -3 -6 -4   11   6   0   1   0   2   2   0   0   1   3   4   0   1  58   3   5   0   0   1  1.42 0.62
+    5 P   -1 -3 -2 -2 -7 -2 -2 -6 -3 -4 -2  0 -2 -4  7 -2 -2 -7 -6 -1    4   1   2   3   0   2   3   0   1   1   4   5   1   1  63   3   2   0   0   4  1.56 0.71
+
+                      K         Lambda
+Standard Ungapped    0.1342     0.3155
+Standard Gapped      0.0410     0.2670
+PSI Ungapped         0.1607     0.3175
+PSI Gapped           0.0492     0.2670
diff --git a/modules/seq/base/pymod/CMakeLists.txt b/modules/seq/base/pymod/CMakeLists.txt
index c3d565c25..4f304ceeb 100644
--- a/modules/seq/base/pymod/CMakeLists.txt
+++ b/modules/seq/base/pymod/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(OST_SEQ_PYMOD_SOURCES
   export_sequence.cc
   export_hmm.cc
+  export_profile_handle.cc
   wrap_seq.cc
 )
 
diff --git a/modules/seq/base/pymod/export_profile_handle.cc b/modules/seq/base/pymod/export_profile_handle.cc
new file mode 100644
index 000000000..77479ac31
--- /dev/null
+++ b/modules/seq/base/pymod/export_profile_handle.cc
@@ -0,0 +1,80 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+
+#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
+#include <boost/python.hpp>
+#include <ost/seq/profile_handle.hh>
+
+using namespace ost::seq;
+using namespace boost::python;
+
+namespace {
+
+boost::python::list wrap_get_names(ProfileDBPtr db) {
+  std::vector<String> v_names = db->GetNames();
+  boost::python::list names;
+  for (std::vector<String>::iterator i = v_names.begin();
+       i != v_names.end(); ++i) {
+    names.append(*i);
+  }
+  return names;
+}
+
+} // anon ns
+
+void export_profile_handle() 
+{
+  class_<ProfileColumn>("ProfileColumn", init<>())
+    .add_property("entropy", &ProfileColumn::GetEntropy)
+    .def("GetFreq", &ProfileColumn::GetFreq)
+    .def("SetFreq", &ProfileColumn::SetFreq)
+    .def("BLOSUMNullModel", 
+         &ProfileColumn::BLOSUMNullModel).staticmethod("BLOSUMNullModel")
+  ;
+
+  class_<ProfileColumnList>("ProfileColumnList", init<>())
+    .def(vector_indexing_suite<ProfileColumnList>())
+  ;
+
+  class_<ProfileHandle, ProfileHandlePtr>("ProfileHandle", init<>())
+    .def("__len__",&ProfileHandle::size)
+    .def("AddColumn", &ProfileHandle::push_back)
+    .def("Extract", &ProfileHandle::Extract)
+    .add_property("null_model", make_function(&ProfileHandle::GetNullModel,
+                  return_value_policy<copy_const_reference>()))
+    .add_property("columns", 
+                  make_function(&ProfileHandle::GetColumns,
+                  return_value_policy<copy_const_reference>()))
+    .add_property("avg_entropy", &ProfileHandle::GetAverageEntropy)
+    .add_property("sequence", &ProfileHandle::GetSequence)
+  ;
+
+  class_<ProfileDB, ProfileDBPtr>("ProfileDB", init<>())
+    .def("Load", &ProfileDB::Load).staticmethod("Load")
+    .def("Save", &ProfileDB::Save)
+    .def("AddProfile", &ProfileDB::AddProfile)
+    .def("GetProfile", &ProfileDB::GetProfile)
+    .def("Size", &ProfileDB::size)
+    .def("GetNames",&wrap_get_names)
+  ;
+}
diff --git a/modules/seq/base/pymod/wrap_seq.cc b/modules/seq/base/pymod/wrap_seq.cc
index d676f8757..f937590d5 100644
--- a/modules/seq/base/pymod/wrap_seq.cc
+++ b/modules/seq/base/pymod/wrap_seq.cc
@@ -21,10 +21,11 @@ using namespace boost::python;
 
 void export_sequence();
 void export_hmm();
+void export_profile_handle();
 
 BOOST_PYTHON_MODULE(_ost_seq)
 {
   export_sequence();
   export_hmm();
-
+  export_profile_handle();
 }
diff --git a/modules/seq/base/src/CMakeLists.txt b/modules/seq/base/src/CMakeLists.txt
index 3ba4fcf21..80247afad 100644
--- a/modules/seq/base/src/CMakeLists.txt
+++ b/modules/seq/base/src/CMakeLists.txt
@@ -17,6 +17,7 @@ aligned_column_iterator.hh
 invalid_sequence.hh
 views_from_sequences.hh
 hmm.hh
+profile_handle.hh
 )
 
 set(OST_SEQ_SOURCES
@@ -30,6 +31,7 @@ alignment_handle.cc
 sequence_op.cc
 views_from_sequences.cc
 hmm.cc
+profile_handle.cc
 )
 
 if (ENABLE_INFO)
diff --git a/modules/seq/base/src/hmm.hh b/modules/seq/base/src/hmm.hh
index 408dc5bc6..f706911a6 100644
--- a/modules/seq/base/src/hmm.hh
+++ b/modules/seq/base/src/hmm.hh
@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 #include <string>
+#include <string.h> // for memcpy, etc
 #include <vector>
 #include <map>
 #include <fstream>
diff --git a/modules/seq/base/src/profile_handle.cc b/modules/seq/base/src/profile_handle.cc
new file mode 100644
index 000000000..0feebe958
--- /dev/null
+++ b/modules/seq/base/src/profile_handle.cc
@@ -0,0 +1,220 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+
+#include <fstream>
+#include <iostream>
+#include <ost/seq/profile_handle.hh>
+
+namespace ost { namespace seq{ 
+
+//////////////////////////////////////////////////////////////////////////////
+// ProfileColumn
+//////////////////////////////////////////////////////////////////////////////
+
+ProfileColumn ProfileColumn::BLOSUMNullModel() {
+  ProfileColumn col;
+
+  col.freq_[0] = 0.0732;
+  col.freq_[1] = 0.0250;
+  col.freq_[2] = 0.0542;
+  col.freq_[3] = 0.0547;
+  col.freq_[4] = 0.0447;
+  col.freq_[5] = 0.0751;
+  col.freq_[6] = 0.0261;
+  col.freq_[7] = 0.0700;
+  col.freq_[8] = 0.1011;
+  col.freq_[9] = 0.0584;
+  col.freq_[10] = 0.0246;
+  col.freq_[11] = 0.0463;
+  col.freq_[12] = 0.0351;
+  col.freq_[13] = 0.0326;
+  col.freq_[14] = 0.0484;
+  col.freq_[15] = 0.0573;
+  col.freq_[16] = 0.0505;
+  col.freq_[17] = 0.0758;
+  col.freq_[18] = 0.0124;
+  col.freq_[19] = 0.0345;
+
+  return col;
+}
+
+int ProfileColumn::GetIndex(char olc) {
+  if (olc == 'A') return 0;
+  if (olc >= 'C' && olc <= 'I') return (olc-'A') - 1;
+  if (olc >= 'K' && olc <= 'N') return (olc-'A') - 2;
+  if (olc >= 'P' && olc <= 'T') return (olc-'A') - 3;
+  if (olc == 'V' ) return 17;
+  if (olc == 'W' ) return 18;
+  if (olc == 'Y' ) return 19;
+  return -1;
+}
+
+Real ProfileColumn::GetFreq(char ch) const{
+  int idx = ProfileColumn::GetIndex(ch);
+  if (idx == -1) {
+    throw Error("Invalid One Letter Code observed when getting frequency in "
+                "ProfileColumn!");
+  }
+  return freq_[idx];
+}
+
+void ProfileColumn::SetFreq(char ch, Real freq){
+  int idx = ProfileColumn::GetIndex(ch);
+  if (idx == -1) {
+    throw Error("Invalid One Letter Code observed when setting frequency in "
+                "ProfileColumn!");
+  }
+  freq_[idx] = freq;
+}
+
+Real ProfileColumn::GetEntropy() const {
+  Real entropy = 0.0;
+  for (uint i = 0; i < 20; ++i) {
+    if (freq_[i] > 0.0) {
+      entropy -= log(freq_[i]) * freq_[i];
+    }
+  }
+  return entropy;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// ProfileHandle
+//////////////////////////////////////////////////////////////////////////////
+
+ProfileHandlePtr ProfileHandle::Extract(uint from, uint to){
+
+  if (to <= from) {
+    throw Error("Second index must be bigger than first one!");
+  }
+
+  if (to > this->size()) {
+    throw Error("Invalid index!");
+  }
+
+  ProfileHandlePtr return_prof(new ProfileHandle);
+  return_prof->SetNullModel(null_model_);
+  for(uint i = from; i < to; ++i){
+    return_prof->push_back(columns_[i]);
+  }
+  return_prof->SetSequence(seq_.substr(from, to-from));
+
+  return return_prof;
+}
+
+Real ProfileHandle::GetAverageEntropy() const {
+  Real n_eff = 0.0;
+  int n = 0;
+  for (std::vector<ProfileColumn>::const_iterator
+       i = this->columns_begin(), e = this->columns_end(); i != e; ++i) {
+    n += 1;
+    n_eff += i->GetEntropy();
+  }
+  return (n > 0) ? n_eff/n : 0.0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// ProfileDB
+//////////////////////////////////////////////////////////////////////////////
+
+void ProfileDB::Save(const String& filename) const{
+
+  std::ofstream out_stream(filename.c_str(), std::ios::binary);
+
+  //write out total size
+  uint32_t total_size = data_.size();
+  out_stream.write(reinterpret_cast<char*>(&total_size),4);
+
+  //write out the data elements
+  char string_size;
+  for (std::map<String, ProfileHandlePtr>::const_iterator i = data_.begin();
+       i != data_.end(); ++i){
+    string_size = static_cast<char>(i->first.size());
+    out_stream.write(reinterpret_cast<char*>(&string_size),1);
+    out_stream.write(i->first.c_str(),string_size);
+    out_stream << *i->second;
+  }
+  out_stream.close();
+}
+
+ProfileDBPtr ProfileDB::Load(const String& filename){
+
+  std::ifstream in_stream(filename.c_str(), std::ios::binary);
+  if (!in_stream){
+    std::stringstream ss;
+    ss << "the file '" << filename << "' does not exist.";
+    throw Error(ss.str());
+  }
+
+  ProfileDBPtr db(new ProfileDB);
+
+  //read in the total size
+  uint32_t total_size;
+  in_stream.read(reinterpret_cast<char*>(&total_size),4);
+
+  //read in the single profiles
+  char string_size;
+  for(uint i = 0; i < total_size; ++i){
+    in_stream.read(&string_size,1);
+    String name(string_size,'X');
+    in_stream.read(&name[0],string_size);
+    ProfileHandlePtr prof(new ProfileHandle);
+    in_stream >> *prof;
+    db->AddProfile(name, prof);
+  }
+
+  return db;
+}
+
+void ProfileDB::AddProfile(const String& name, ProfileHandlePtr prof) {
+  if (name.size() > 255) {
+    throw Error("Name of Profile must be smaller than 256!");
+  }
+  if (name.empty()) {
+    throw Error("Name must not be empty!");
+  }
+  data_[name] = prof;
+}
+
+ProfileHandlePtr ProfileDB::GetProfile(const String& name) const{
+  std::map<String,ProfileHandlePtr>::const_iterator i = data_.find(name);
+  if (i == data_.end()) {
+    std::stringstream ss;
+    ss << "Profile database does not contain an entry with name ";
+    ss << name << "!";
+    throw Error(ss.str());
+  }
+  return i->second;
+}
+
+std::vector<String> ProfileDB::GetNames() const{
+  std::vector<String> return_vec;
+  return_vec.reserve(this->size());
+  for (std::map<String, ProfileHandlePtr>::const_iterator i = data_.begin(); 
+       i != data_.end(); ++i){
+    return_vec.push_back(i->first);
+  }
+  return return_vec;
+}
+
+
+}} //ns
diff --git a/modules/seq/base/src/profile_handle.hh b/modules/seq/base/src/profile_handle.hh
new file mode 100644
index 000000000..e3642b6e1
--- /dev/null
+++ b/modules/seq/base/src/profile_handle.hh
@@ -0,0 +1,250 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+
+#ifndef OST_SEQ_PROFILE_HANDLE_HH
+#define OST_SEQ_PROFILE_HANDLE_HH
+
+#include <ost/base.hh>
+#include <ost/stdint.hh>
+#include <ost/message.hh>
+#include <ost/seq/module_config.hh>
+
+#include <string.h> // for memcpy, etc
+#include <vector>
+#include <map>
+#include <fstream>
+#include <boost/shared_ptr.hpp>
+
+namespace ost { namespace seq { 
+
+class ProfileHandle;
+class ProfileColumn;
+class ProfileDB;
+typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
+typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
+typedef std::vector<ProfileColumn> ProfileColumnList;
+
+/// \brief Defines profile of  20 frequencies for one residue.
+///
+/// Frequencies are identified by the one-letter-code for that amino acid.
+/// (possible codes: ACDEFGHIKLMNPQRSTVWY)
+class DLLEXPORT_OST_SEQ ProfileColumn {
+public:
+
+  /// \brief Construct a profile with all frequencies set to 0.
+  ProfileColumn() {
+    memset(freq_, 0, sizeof(freq_));
+  }
+
+  ProfileColumn(const ProfileColumn& rhs) {
+    memcpy(freq_, rhs.freq_, sizeof(freq_));
+  }
+  ProfileColumn& operator= (const ProfileColumn& rhs) {
+    memcpy(freq_, rhs.freq_, sizeof(freq_));
+    return *this;
+  }
+
+  static ProfileColumn BLOSUMNullModel();
+
+  /// \brief Translate one-letter-code to index (0-indexing).
+  static int GetIndex(char ch);
+
+  Real GetFreq(char ch) const;
+
+  void SetFreq(char ch, Real freq);
+
+  bool operator==(const ProfileColumn& rhs) const {
+    return !memcmp(freq_, rhs.freq_, sizeof(freq_));
+  }
+  bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
+
+  Real* freqs_begin() { return freq_; }
+  Real* freqs_end() { return freq_ + 20; }
+  const Real* freqs_begin() const { return freq_; }
+  const Real* freqs_end() const { return freq_ + 20; }
+
+  /// \brief Get entropy for this column.
+  Real GetEntropy() const;
+
+  // functions to feed streams with limited accuracy of internal data
+  // not intended for python export
+
+  friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
+    int16_t data[20];
+    //transform aa_freq
+    for (uint i = 0; i < 20; ++i) {
+      data[i] = static_cast<int16_t>(col.freq_[i]*10000);
+    }
+    os.write(reinterpret_cast<char*>(data), sizeof(data));
+    return os;
+  }
+
+  friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
+    int16_t data[20];
+    is.read(reinterpret_cast<char*>(data), sizeof(data));
+    //transform aa_freq
+    for (uint i = 0; i < 20; ++i) {
+      col.freq_[i] = data[i] * 0.0001;
+    }
+    return is;
+  }
+
+private:
+  Real freq_[20];
+};
+
+/// \brief Provides a profile for a sequence.
+///
+/// Properties:
+/// - consists of N ProfileColumn (N = #residues in sequence)
+/// - contains sequence (String) of length N
+/// - user must enforce consistency between sequence length and #columns
+/// - contains a null_model to use for this sequence
+class DLLEXPORT_OST_SEQ ProfileHandle { 
+public:
+  /// \brief Constructs an empty profile handle (sequence = '', 0 columns).
+  ProfileHandle() {}
+
+  // uses compiler-generated copy- and assignment operators (work here!)
+
+  const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
+
+  const ProfileColumn& GetNullModel() const { return null_model_; } 
+
+  void SetNullModel(const ProfileColumn& null_model) { null_model_ = null_model; }
+
+  String GetSequence() const { return seq_; }
+
+  void SetSequence(const String& seq) { seq_ = seq; }
+
+  /// \brief Extract subset of profile for columns from until to-1 (0-indexing).
+  /// Null model is copied from this profile.
+  /// \throw Error if to <= from or to > size().
+  ProfileHandlePtr Extract(uint from, uint to);
+
+  /// \brief Compute average entropy over all columns.
+  Real GetAverageEntropy() const;
+
+  // some functions to make it behave like a vector
+
+  void clear() { seq_ = ""; columns_.clear(); }
+
+  size_t size() const { return columns_.size(); }
+
+  bool empty() const { return columns_.empty(); }
+
+  void push_back(const ProfileColumn& c) { columns_.push_back(c); }  
+
+  ProfileColumn& operator[](size_t index) { return columns_[index]; }
+
+  const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
+
+  ProfileColumn& at(size_t index) { return columns_.at(index); }
+
+  const ProfileColumn& at(size_t index) const { return columns_.at(index); }
+
+  bool operator==(const ProfileHandle& other) const {
+    return seq_ == other.seq_ &&
+           columns_ == other.columns_ &&
+           null_model_ == other.null_model_;
+  }
+  
+  bool operator!=(const ProfileHandle& other) const { return !(other == (*this)); }
+
+  ProfileColumnList::const_iterator columns_end() const { return columns_.end(); }
+  ProfileColumnList::iterator columns_end() { return columns_.end(); }
+  ProfileColumnList::const_iterator columns_begin() const { return columns_.begin(); }
+  ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
+
+  // functions to feed streams with limited accuracy of internal data
+  // not intended for python export
+
+  friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
+    // null model
+    os << prof.null_model_;
+    // num. columns/residues
+    uint32_t size = prof.size();
+    os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
+    // sequence
+    if (prof.seq_.length() != size) {
+      throw Error("ProfileHandle - Inconsistency between number of columns and "
+                  " seq. length.");
+    }
+    os.write(prof.seq_.c_str(), size);
+    // columns
+    for(uint i = 0; i < size; ++i){
+      os << prof.columns_[i];
+    }
+
+    return os;
+  }
+
+  friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
+    // null model
+    is >> prof.null_model_;
+    // num. columns/residues
+    uint32_t size;
+    is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
+    // sequence
+    std::vector<char> tmp_buf(size);
+    is.read(&tmp_buf[0], size);
+    prof.seq_.assign(&tmp_buf[0], size);
+    // columns
+    prof.columns_.resize(size);
+    for(uint i = 0; i < size; ++i){
+      is >> prof.columns_[i];
+    }
+
+    return is;
+  }
+
+private:
+  String             seq_;
+  ProfileColumn      null_model_; 
+  ProfileColumnList  columns_;
+};
+
+/// \brief Contains a DB of profiles (identified by a unique name (String)).
+class DLLEXPORT_OST_SEQ ProfileDB {
+public:
+  /// \brief Saves all profiles in DB with limited accuracy of internal data.
+  /// Binary format with fixed-width integers (should be portable).
+  void Save(const String& filename) const;
+
+  static ProfileDBPtr Load(const String& filename);
+
+  void AddProfile(const String& name, ProfileHandlePtr prof);
+
+  ProfileHandlePtr GetProfile(const String& name) const;
+
+  size_t size() const { return data_.size(); }
+
+  std::vector<String> GetNames() const;
+
+private:
+  std::map<String, ProfileHandlePtr> data_;
+};
+
+}}
+
+#endif
diff --git a/modules/seq/base/tests/CMakeLists.txt b/modules/seq/base/tests/CMakeLists.txt
index 5fdf6e37c..3bdf19575 100644
--- a/modules/seq/base/tests/CMakeLists.txt
+++ b/modules/seq/base/tests/CMakeLists.txt
@@ -5,8 +5,8 @@ set(OST_SEQ_UNIT_TESTS
   test_aligned_region.cc
   test_alignment.cc
   test_hmm.cc
+  test_profile.cc
   tests.cc
 )
 
 ost_unittest(MODULE seq SOURCES "${OST_SEQ_UNIT_TESTS}")
-
diff --git a/modules/seq/base/tests/test_profile.cc b/modules/seq/base/tests/test_profile.cc
new file mode 100644
index 000000000..ef2292bc7
--- /dev/null
+++ b/modules/seq/base/tests/test_profile.cc
@@ -0,0 +1,144 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2015 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+/*
+  Author: Gerardo Tauriello, Gabriel Studer
+ */
+
+#define BOOST_TEST_DYN_LINK
+#include <boost/test/unit_test.hpp>
+#include <boost/test/auto_unit_test.hpp>
+#include <iostream>
+
+#include <ost/seq/profile_handle.hh>
+#include <ost/io/seq/hhm_io_handler.hh>
+#include <ost/io/seq/pssm_io_handler.hh>
+
+using namespace ost::seq;
+
+
+BOOST_AUTO_TEST_SUITE( profile );
+
+
+BOOST_AUTO_TEST_CASE(profile_column)
+{
+  const char * olc = "ACDEFGHIKLMNPQRSTVWY";
+  
+  // check empty column
+  ProfileColumn pc;
+  BOOST_CHECK_EQUAL(pc.GetEntropy(), Real(0));
+  Real* freq_ptr = pc.freqs_begin();
+  for (uint i = 0; i < 20; ++i) {
+    BOOST_CHECK_EQUAL(freq_ptr[i], Real(0));
+    BOOST_CHECK_EQUAL(pc.GetFreq(olc[i]), Real(0));
+    // check overwrite
+    const Real tst = Real(i)/Real(20);
+    pc.SetFreq(olc[i], tst);
+    BOOST_CHECK_EQUAL(pc.GetFreq(olc[i]), tst);
+  }
+
+  // check null model
+  ProfileColumn nm = ProfileColumn::BLOSUMNullModel();
+  BOOST_CHECK_GE(pc.GetEntropy(), Real(0));
+  Real sum_freq = 0;
+  freq_ptr = nm.freqs_begin();
+  for (uint i = 0; i < 20; ++i) {
+    BOOST_CHECK_GE(freq_ptr[i], Real(0));
+    sum_freq += freq_ptr[i];
+  }
+  BOOST_CHECK_CLOSE(sum_freq, Real(1), Real(1e-3));
+
+  // check assign/copy
+  ProfileColumn nm2(nm);
+  BOOST_CHECK(nm == nm2);
+  Real* freq_ptr2 = nm2.freqs_begin();
+  for (uint i = 0; i < 20; ++i) {
+    BOOST_CHECK_EQUAL(freq_ptr[i], freq_ptr2[i]);
+  }
+  pc = nm;
+  BOOST_CHECK(nm == pc);
+}
+
+BOOST_AUTO_TEST_CASE(profile_handle)
+{
+  // generate dummy
+  ProfileHandle prof;
+  BOOST_CHECK_EQUAL(prof.size(), size_t(0));
+  BOOST_CHECK_EQUAL(prof.GetSequence(), "");
+  prof.SetSequence("RRSPP");
+  ProfileColumn pc0 = ProfileColumn::BLOSUMNullModel();
+  prof.SetNullModel(pc0);
+  ProfileColumn pc1, pc2, pc3, pc4;
+  pc1.SetFreq('H', 0.74);
+  pc1.SetFreq('E', 0.01);
+  pc1.SetFreq('L', 0.25);
+  pc2.SetFreq('L', 1);
+  pc3.SetFreq('Y', 0.5);
+  pc3.SetFreq('E', 0.5);
+  pc4.SetFreq('A', 0.5);
+  pc4.SetFreq('H', 0.5);
+  prof.push_back(pc0);
+  prof.push_back(pc1);
+  prof.push_back(pc2);
+  prof.push_back(pc3);
+  prof.push_back(pc4);
+
+  // check components
+  BOOST_CHECK_EQUAL(prof.size(), size_t(5));
+  BOOST_CHECK(prof.GetNullModel() == pc0);
+  BOOST_CHECK_EQUAL(prof.GetSequence(), "RRSPP");
+
+  // check columns with different methods of getting the columns
+  BOOST_CHECK(prof[0] == pc0);
+  BOOST_CHECK(*(prof.columns_begin()+1) == pc1);
+  BOOST_CHECK(prof.GetColumns()[2] == pc2);
+  BOOST_CHECK(prof.at(3) == pc3);
+  BOOST_CHECK(*(prof.columns_end()-1) == pc4);
+
+  // check entropies
+  Real entropy1 = -0.74*log(0.74) - 0.01*log(0.01) - 0.25*log(0.25);
+  Real entropy2 = 0;
+  Real entropy3 = -log(0.5);
+  Real avg_entropy = (pc0.GetEntropy() + entropy1 + entropy2 + 2*entropy3)/5;
+  BOOST_CHECK_CLOSE(prof[1].GetEntropy(), entropy1, Real(1e-3));
+  BOOST_CHECK_CLOSE(prof[2].GetEntropy(), entropy2, Real(1e-3));
+  BOOST_CHECK_CLOSE(prof[3].GetEntropy(), entropy3, Real(1e-3));
+  BOOST_CHECK_CLOSE(prof.GetAverageEntropy(), avg_entropy, Real(1e-3));
+
+  // extract parts
+  ProfileHandlePtr pp = prof.Extract(1, 4);
+  avg_entropy = (entropy1 + entropy2 + entropy3)/3;
+  BOOST_CHECK_EQUAL(pp->size(), size_t(3));
+  BOOST_CHECK_EQUAL(pp->GetSequence(), "RSP");
+  BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), avg_entropy, Real(1e-3));
+
+  pp = prof.Extract(0, 3);
+  avg_entropy = (pc0.GetEntropy() + entropy1 + entropy2)/3;
+  BOOST_CHECK_EQUAL(pp->size(), size_t(3));
+  BOOST_CHECK_EQUAL(pp->GetSequence(), "RRS");
+  BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), avg_entropy, Real(1e-3));
+
+  pp = prof.Extract(3, 5);
+  BOOST_CHECK_EQUAL(pp->size(), size_t(2));
+  BOOST_CHECK_EQUAL(pp->GetSequence(), "PP");
+  BOOST_CHECK_CLOSE(pp->GetAverageEntropy(), entropy3, Real(1e-3));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END();
-- 
GitLab