Skip to content
Snippets Groups Projects
Commit 4d9ba7a7 authored by Studer Gabriel's avatar Studer Gabriel
Browse files

Document and enhance info that is encoded in tpls returned by AFDPTPLSearch

parent 6e39a68e
No related branches found
No related tags found
No related merge requests found
......@@ -321,7 +321,7 @@ class FSStructureServer:
class PentaMatch:
""" Pentamer matching for fast sequence searches
:class:`PentamerMatch` has fast sequence searches with low sensitivity as
:class:`PentaMatch` has fast sequence searches with low sensitivity as
use case. Specifically searching the full AFDB. Stores all unique pentamers
for each search sequence. Given a query sequence, it computes the number of
matching pentamers with respect to each search sequence and returns the top
......@@ -422,9 +422,10 @@ class PentaMatch:
But the latter is required if there are
duplicates.
:type unique_pentamers: :class:`bool`
:returns: :class:`list` of :class:`int` with length *N*. If
*return_counts* is true, the :class:`list` contains
:class:`tuple` with two elements: 1) count 2) index.
:returns: :class:`list` of :class:`int` with length *N* specifying
entry indices. If *return_counts* is true, the
:class:`list` contains :class:`tuple` with two elements:
1) count 2) index.
:raises: :class:`RuntimeError` if N is invalid or sequence is shorter
than 5 characters
"""
......@@ -513,7 +514,13 @@ def AFDBTPLSearch(fs_server, pentamatch, trg_seq, pentamatch_n = 100,
:returns: :class:`list` of pairs with first element being the tpl score,
the second element being a :class:`ost.seq.AlignmentHandle` with
first sequence being *trg_seq* and second sequence the hit found
in *fs_server* with structure attached.
in *fs_server* with structure attached. If *fs_server* has been
generated with the default procedure described in the docs,
additional info is available in the name of the attached
structure. That's accessible with
aln.GetSequence(1).GetAttachedView().GetName(). That is
structured as "<UniprotAC> <Fragment> <AFDB version> <Idx>" where
idx refers to the raw idx of the template in *fs_server*.
"""
top_n = pentamatch.TopN(pentamatch_n, str(trg_seq))
if isinstance(trg_seq, str):
......@@ -535,7 +542,7 @@ def AFDBTPLSearch(fs_server, pentamatch, trg_seq, pentamatch_n = 100,
if col[1] != '-':
current_pos += 1
score = summed_bfac / len(trg_seq)
tmp.append((score, aln, omf))
tmp.append((score, aln, omf, idx))
tmp.sort(reverse=True, key=lambda x: x[0])
return_list = list()
for item in tmp[:tpl_n]:
......@@ -551,7 +558,9 @@ def AFDBTPLSearch(fs_server, pentamatch, trg_seq, pentamatch_n = 100,
new_s2 = seq.CreateSequence(s2.name, s2_prefix + str(s2))
new_s2.SetOffset(s2.offset)
aln = seq.CreateAlignment(new_s1, new_s2)
aln.AttachView(1, item[2].GetAUChain("A").CreateFullView())
ent = item[2].GetAUChain("A").CreateFullView()
ent.SetName(ent.GetName() + ' ' + str(item[3]))
aln.AttachView(1, ent)
return_list.append((item[0], aln))
return return_list
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment