diff --git a/modelling/pymod/export_afdb.cc b/modelling/pymod/export_afdb.cc
index b4d4eb08172622358e26d2782e2547e70b8e2086..7a9741d7552909274fd0eaa84c667376996a76be 100644
--- a/modelling/pymod/export_afdb.cc
+++ b/modelling/pymod/export_afdb.cc
@@ -39,4 +39,8 @@ void export_afdb()
                                                           arg("result_list")));
   def("CreateAFDBIdx", &CreateAFDBIdx, (arg("uniprot_ac"), arg("fragment"),
                                         arg("version")));
+
+  def("CreatePentaMatch", &CreatePentaMatch, (arg("seq_list"),
+                                              arg("db_path"),
+                                              arg("entries_from_seqnames")));
 }
diff --git a/modelling/src/afdb.cc b/modelling/src/afdb.cc
index d41564c84f2ded3caa41117aa8c69f5bb25756bb..210146ac2b5fbcec97e2dc029e5fb3ef5a81245f 100644
--- a/modelling/src/afdb.cc
+++ b/modelling/src/afdb.cc
@@ -14,8 +14,10 @@
 // limitations under the License.
 
 #include <algorithm>
+#include <fstream>
 
 #include <ost/log.hh>
+#include <ost/string_ref.hh>
 
 #include <promod3/modelling/afdb.hh>
 
@@ -48,6 +50,15 @@ namespace{
     throw ost::Error(ss.str());
   }
 
+  inline int PentamerToIdx(const char* ptr) {
+    return CharToIdx(ptr[0])*160000 + CharToIdx(ptr[1])*8000 +
+           CharToIdx(ptr[2])*400 + CharToIdx(ptr[3])*20 + CharToIdx(ptr[4]);
+  }
+  
+  inline int PentamerToIdx(const int* ptr) {
+    return ptr[0]*160000 + ptr[1]*8000 + ptr[2]*400 + ptr[3]*20 + ptr[4];
+  }
+
   inline uint64_t AlphaToIdx(char ch) {
     if(ch == ' ') {
       return 0;
@@ -90,17 +101,22 @@ namespace{
 
 namespace promod3 { namespace modelling {
 
-int PentamerToIdx(const char* ptr) {
-  return CharToIdx(ptr[0])*160000 + CharToIdx(ptr[1])*8000 +
-         CharToIdx(ptr[2])*400 + CharToIdx(ptr[3])*20 + CharToIdx(ptr[4]);
-}
 
 void SeqToPentamerIndices(const String& seq, bool unique, std::vector<int>& indices) {
-  int N = seq.size() - 4;
+  int N = seq.size();
+  if(N < 5) {
+    std::stringstream ss;
+    ss << "Sequence must have at least length 5, got: " << seq;
+    throw ost::Error(ss.str());
+  }
   indices.resize(N);
   for(int i = 0; i < N; ++i) {
-    indices[i] = PentamerToIdx(&seq[i]);
+    indices[i] = CharToIdx(seq[i]);
+  }
+  for(int i = 0; i < N-4; ++i) {
+    indices[i] = PentamerToIdx(&indices[i]);
   }
+  indices.resize(N-4);
   if(unique) {
     auto last = std::unique(indices.begin(), indices.end());
     indices.erase(last, indices.end());
@@ -123,40 +139,40 @@ uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version) {
   }
 
   if(!CheckNumeric(uniprot_ac[1], false)) {
-    throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+    throw ost::Error("Exp capital alphabetic character at idx 1 of uniprot AC");
   }
 
   if(!CheckAlphaNumeric(uniprot_ac[2], false)) {
-    throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+    throw ost::Error("Exp capital alphabetic character at idx 2 of uniprot AC");
   }
 
   if(!CheckAlphaNumeric(uniprot_ac[3], false)) {
-    throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+    throw ost::Error("Exp capital alphabetic character at idx 3 of uniprot AC");
   }
 
   if(!CheckAlphaNumeric(uniprot_ac[4], false)) {
-    throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+    throw ost::Error("Exp capital alphabetic character at idx 4 of uniprot AC");
   }
 
   if(!CheckNumeric(uniprot_ac[5], false)) {
-    throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+    throw ost::Error("Exp capital alphabetic character at idx 5 of uniprot AC");
   }
 
   if(ac_size > 6) {
     if(!CheckAlpha(uniprot_ac[6], true)) {
-      throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+      throw ost::Error("Exp capital alphabetic character at idx 6 of uniprot AC");
     }
   
     if(!CheckAlphaNumeric(uniprot_ac[7], true)) {
-      throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+      throw ost::Error("Exp capital alphabetic character at idx 7 of uniprot AC");
     }
   
     if(!CheckAlphaNumeric(uniprot_ac[8], true)) {
-      throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+      throw ost::Error("Exp capital alphabetic character at idx 8 of uniprot AC");
     }
   
     if(!CheckNumeric(uniprot_ac[9], true)) {
-      throw ost::Error("Exp capital alphabetic character at idx 0 of uniprot AC");
+      throw ost::Error("Exp capital alphabetic character at idx 9 of uniprot AC");
     }
   }
 
@@ -187,6 +203,92 @@ uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version) {
   return idx;
 }
 
+void CreatePentaMatch(const ost::seq::SequenceList& seq_list,
+                      const String& db_path,
+                      bool entries_from_seqnames) {
+
+  String indexer_path = db_path + "/" + "indexer.dat";
+  String pos_path = db_path + "/" + "pos.dat";
+  String length_path = db_path + "/" + "length.dat";
+  String meta_path = db_path + "/" + "meta.dat";
+
+  std::vector<int> entry_indices;
+  entry_indices.reserve(seq_list.GetCount());
+  if(entries_from_seqnames) {
+    for(int i = 0; i < seq_list.GetCount(); ++i) {
+      const String& sname = seq_list[i].GetName();
+      ost::StringRef s(&sname[0], sname.size());
+      std::pair<bool, int> idx = s.trim().to_int();
+      if(idx.first == false) {
+        std::stringstream ss;
+        ss << "Cannot cast seq name to integer: " << sname;
+        throw ost::Error(ss.str());
+      }
+      entry_indices.push_back(idx.second);
+    }
+  } else {
+    for(int i = 0; i < seq_list.GetCount(); ++i) {
+      entry_indices.push_back(i);
+    }
+  }
+
+  std::vector<std::vector<int32_t> > indexer(3200000);
+  for(int i = 0; i < seq_list.GetCount(); ++i) {
+    int entry_idx = entry_indices[i];
+    std::vector<int> penta_indices;
+    SeqToPentamerIndices(seq_list[i].GetString(), true, penta_indices);
+    for(auto j = penta_indices.begin(); j != penta_indices.end(); ++j) {
+      indexer[*j].push_back(entry_idx);
+    }
+  }
+
+  for(auto i = indexer.begin(); i != indexer.end(); ++i) {
+    std::sort(i->begin(), i->end());
+  }
+
+  std::ofstream indexer_stream(indexer_path, std::ios::binary);
+  if(!indexer_stream) {
+    throw ost::Error("Could not open indexer file: " + indexer_path);
+  }
+  std::vector<int64_t> positions;
+  std::vector<int32_t> lengths;
+  int64_t current_pos = 0;
+  for(auto i = indexer.begin(); i != indexer.end(); ++i) {
+    int size = i->size();
+    if(size > 0) {
+      indexer_stream.write(reinterpret_cast<char*>(&(*i)[0]),
+                           size*sizeof(int32_t));
+    }
+    positions.push_back(current_pos);
+    lengths.push_back(size);
+    current_pos += size;
+  }
+  indexer_stream.close();
+
+  std::ofstream pos_stream(pos_path, std::ios::binary);
+  std::ofstream length_stream(length_path, std::ios::binary);
+  std::ofstream meta_stream(meta_path, std::ofstream::out);
+  if(!pos_stream) {
+    throw ost::Error("Could not open pos file: " + pos_path);
+  }
+  if(!length_stream) {
+    throw ost::Error("Could not open length file: " + length_path);
+  }
+  if(!meta_stream) {
+    throw ost::Error("Could not open meta file: " + meta_path);
+  }
+  pos_stream.write(reinterpret_cast<char*>(&positions[0]),
+                   positions.size()*sizeof(int64_t));
+  length_stream.write(reinterpret_cast<char*>(&lengths[0]),
+                      lengths.size()*sizeof(int32_t));
+  int32_t max_entry_idx = *std::max_element(entry_indices.begin(),
+                                            entry_indices.end());
+  meta_stream << max_entry_idx;
+  pos_stream.close();
+  length_stream.close();
+  meta_stream.close();
+}
+
 
 
 }} //ns
diff --git a/modelling/src/afdb.hh b/modelling/src/afdb.hh
index e48f792b0fbcb75323a47953ca06ccc6dae19e20..551e51d97285707825851f19fdb721fbb0b1687f 100644
--- a/modelling/src/afdb.hh
+++ b/modelling/src/afdb.hh
@@ -17,17 +17,20 @@
 #define PM3_MODELLING_AFDB_HH
 
 #include <ost/mol/mol.hh>
+#include <ost/seq/sequence_list.hh>
 #include <promod3/core/message.hh>
 
-namespace promod3 { namespace modelling {
 
-int PentamerToIdx(const char* ptr);
+namespace promod3 { namespace modelling {
 
 void SeqToPentamerIndices(const String& seq, bool unique,
                           std::vector<int>& indices);
 
-uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version);
+void CreatePentaMatch(const ost::seq::SequenceList& seq_list,
+                      const String& db_path,
+                      bool entries_from_seqnames);
 
+uint64_t CreateAFDBIdx(const String& uniprot_ac, int fragment, int version);
 }} //ns
 
 #endif