From f8fc94c762bc9771bcf5dfef2447b9fe1b100df2 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Mon, 30 Oct 2023 23:05:52 +0100
Subject: [PATCH] FSStructureServer: alternative access functionality

Access OMF object by pos, length and chunk data - expert use only.
Background for this development is a request by Andrew. He likes the idea
of keeping AFDB outside Mongo in big data blobs. However, for each uniprot
AC it's OK to store a couple of numbers => the numbers that are stored in
the indexing mechanism of FSStructure server (pos, length, chunk). This can
be quickly changed in case of an update without massive data shuffling.
Accessing an OMF object from a cold data blob then requires to open the
memory mapped data and ask the OS to jump to a certain location in that
data blob. This avoids loading the whole indexing data if only a single
structure needs to be loaded.
---
 modelling/pymod/_afdb_modelling.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/modelling/pymod/_afdb_modelling.py b/modelling/pymod/_afdb_modelling.py
index a273d8d8..ffecf687 100644
--- a/modelling/pymod/_afdb_modelling.py
+++ b/modelling/pymod/_afdb_modelling.py
@@ -226,6 +226,27 @@ class FSStructureServer:
         omf_data = self.data[chunk][pos:pos+length]
         return io.OMF.FromBytes(gzip.decompress(omf_data))
 
+    def GetOMFByPLC(self, pos, length, chunk):
+        """ Get stored OMF data structure
+
+        Get data by explicitely specifying PLC (pos, length, chunk). For expert
+        use only, no range checks performed.
+        Instead of providing a uniprot AC or an index, this function takes
+        directly the internal pos, length and chunk parameters that are stored
+        for that particular index. Use case: avoid loading the respective data
+        files and only open the memory mapped files. 
+
+        :param pos: Byte pos in specified chunk
+        :type pos: :class:`int`
+        :param length: Num bytes of entry
+        :type length: :class:`int`
+        :param chunk: Chunk in which entry resides
+        :type chunk: :class:`int` 
+        :returns: OMF data structure of type :class:`ost.io.OMF`
+        """
+        omf_data = self.data[chunk][pos:pos+length]
+        return io.OMF.FromBytes(gzip.decompress(omf_data))
+
     def GetOMF(self, uniprot_ac, fragment="F1", version="v4"):
         """ Get stored OMF data structure
 
-- 
GitLab