Skip to content
Snippets Groups Projects
Commit 0db722a9 authored by Bienchen's avatar Bienchen
Browse files

Added new option "try_resnum_first" to AlignToSEQRES. Mainly for the...

Added new option "try_resnum_first" to AlignToSEQRES. Mainly for the MMCifParser to align seqres entries easier to the chains.
parent 84b8cdfa
No related branches found
No related tags found
No related merge requests found
from _ost_seq_alg import *
from ost.seq.alg.mat import *
def AlignToSEQRES(chain, seqres):
def AlignToSEQRES(chain, seqres, try_resnum_first=False):
"""
Aligns the residues of chain to the SEQRES sequence, inserting gaps where
needed. The function uses the connectivity of the protein backbone to find
......@@ -9,35 +9,52 @@ def AlignToSEQRES(chain, seqres):
sequence.
All the non-ligand, peptide-linking residues of the chain must be listed in
SEQRES. If there are any additional residues in the chain, the function raises
a ValueError.
SEQRES. If there are any additional residues in the chain, the function
raises a ValueError.
If 'try_resnum_first' is set, building the alignment following residue numbers
is tried first.
:returns: The alignment of the residues in the chain and the SEQRES entries.
:rtype: :class:`~ost.seq.AlignmentHandle`
"""
from ost import seq
from ost import mol
from ost import LogWarning
view=chain.Select('ligand=false and peptide=true')
residues=view.residues
if len(residues)==0:
return seq.CreateAlignment()
fragments=[residues[0].one_letter_code]
for r1, r2 in zip(residues[:-2], residues[1:]):
if not mol.InSequence(r1.handle, r2.handle):
fragments.append('')
fragments[-1]+=r2.one_letter_code
ss=str(seqres)
pos=0
aln_seq=''
for frag in fragments:
new_pos=ss.find(frag, pos)
if new_pos==-1:
raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
aln_seq+='-'*(new_pos-pos)+frag
pos=new_pos+len(frag)
aln_seq+='-'*(len(seqres)-len(aln_seq))
if try_resnum_first:
aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
for r1 in residues:
if r1.number.num <= len(seqres) and r1.number.num > 0:
if seqres[r1.number.num - 1] == r1.one_letter_code:
aln_seq[r1.number.num - 1] = r1.one_letter_code
else:
LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
'", while SEQRES is "' + seqres[r1.number.num - 1] +
'" at the corresponding position.')
try_resnum_first = False
if not try_resnum_first:
fragments=[residues[0].one_letter_code]
for r1, r2 in zip(residues[:-1], residues[1:]):
if not mol.InSequence(r1.handle, r2.handle):
fragments.append('')
fragments[-1]+=r2.one_letter_code
ss=str(seqres)
pos=0
aln_seq=''
for frag in fragments:
new_pos=ss.find(frag, pos)
if new_pos==-1:
raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
aln_seq+='-'*(new_pos-pos)+frag
pos=new_pos+len(frag)
aln_seq = seq.CreateSequence('atoms',
aln_seq+('-'*(len(seqres)-len(aln_seq))))
return seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
seq.CreateSequence('atoms', aln_seq))
aln_seq)
def AlignmentFromChainView(chain, handle_seq_name='handle',
......
......@@ -6,6 +6,7 @@ set(OST_SEQ_ALG_UNIT_TESTS
test_local_align.py
test_global_align.py
test_weight_matrix.py
test_aligntoseqres.py
)
ost_unittest(MODULE seq_alg SOURCES "${OST_SEQ_ALG_UNIT_TESTS}")
......
import unittest
from ost import *
from ost import settings
from ost import seq
from ost import io
class TestAlignToSeqRes(unittest.TestCase):
def testAlignWorking(self):
ent, seqres = io.LoadMMCIF("testfiles/align_to_seqres.mmcif")
chain = ent.FindChain("A")
sequence = seqres.FindSequence(chain.GetName());
seqres_aln = seq.alg.AlignToSEQRES(chain, sequence, try_resnum_first=False)
self.assertEqual(str(sequence), "MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEV"+
"LIAQFTEHTSAIKVRGKAYIQTRHGVIESEGKK")
self.assertEqual(str(seqres_aln.sequences[1]), "----SDFVVIKALEDGVNVIGLTR--"+
"-TRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGK-")
seqres_aln = seq.alg.AlignToSEQRES(chain, sequence, try_resnum_first=True)
self.assertEqual(str(seqres_aln.sequences[1]), "----SDFVVIKALEDGVNVIGLTR--"+
"-TRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGK-")
def testAlignFail(self):
ent, seqres = io.LoadMMCIF("testfiles/align_to_seqres_valueerror.mmcif")
chain = ent.FindChain("A")
sequence = seqres.FindSequence(chain.GetName());
ost.PushVerbosityLevel(0)
self.assertRaises(ValueError, seq.alg.AlignToSEQRES, chain, sequence, True)
ost.PopVerbosityLevel()
if __name__ == "__main__":
try:
unittest.main()
except Exception, e:
print e
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment