Skip to content
Snippets Groups Projects
Commit 0db722a9 authored by Bienchen's avatar Bienchen
Browse files

Added new option "try_resnum_first" to AlignToSEQRES. Mainly for the...

Added new option "try_resnum_first" to AlignToSEQRES. Mainly for the MMCifParser to align seqres entries easier to the chains.
parent 84b8cdfa
No related branches found
No related tags found
No related merge requests found
from _ost_seq_alg import * from _ost_seq_alg import *
from ost.seq.alg.mat import * from ost.seq.alg.mat import *
def AlignToSEQRES(chain, seqres): def AlignToSEQRES(chain, seqres, try_resnum_first=False):
""" """
Aligns the residues of chain to the SEQRES sequence, inserting gaps where Aligns the residues of chain to the SEQRES sequence, inserting gaps where
needed. The function uses the connectivity of the protein backbone to find needed. The function uses the connectivity of the protein backbone to find
...@@ -9,35 +9,52 @@ def AlignToSEQRES(chain, seqres): ...@@ -9,35 +9,52 @@ def AlignToSEQRES(chain, seqres):
sequence. sequence.
All the non-ligand, peptide-linking residues of the chain must be listed in All the non-ligand, peptide-linking residues of the chain must be listed in
SEQRES. If there are any additional residues in the chain, the function raises SEQRES. If there are any additional residues in the chain, the function
a ValueError. raises a ValueError.
If 'try_resnum_first' is set, building the alignment following residue numbers
is tried first.
:returns: The alignment of the residues in the chain and the SEQRES entries. :returns: The alignment of the residues in the chain and the SEQRES entries.
:rtype: :class:`~ost.seq.AlignmentHandle` :rtype: :class:`~ost.seq.AlignmentHandle`
""" """
from ost import seq from ost import seq
from ost import mol from ost import mol
from ost import LogWarning
view=chain.Select('ligand=false and peptide=true') view=chain.Select('ligand=false and peptide=true')
residues=view.residues residues=view.residues
if len(residues)==0: if len(residues)==0:
return seq.CreateAlignment() return seq.CreateAlignment()
fragments=[residues[0].one_letter_code] if try_resnum_first:
for r1, r2 in zip(residues[:-2], residues[1:]): aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
if not mol.InSequence(r1.handle, r2.handle): for r1 in residues:
fragments.append('') if r1.number.num <= len(seqres) and r1.number.num > 0:
fragments[-1]+=r2.one_letter_code if seqres[r1.number.num - 1] == r1.one_letter_code:
ss=str(seqres) aln_seq[r1.number.num - 1] = r1.one_letter_code
pos=0 else:
aln_seq='' LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
for frag in fragments: '", while SEQRES is "' + seqres[r1.number.num - 1] +
new_pos=ss.find(frag, pos) '" at the corresponding position.')
if new_pos==-1: try_resnum_first = False
raise ValueError('"%s" is not a substring of "%s"' % (frag, ss)) if not try_resnum_first:
aln_seq+='-'*(new_pos-pos)+frag fragments=[residues[0].one_letter_code]
pos=new_pos+len(frag) for r1, r2 in zip(residues[:-1], residues[1:]):
aln_seq+='-'*(len(seqres)-len(aln_seq)) if not mol.InSequence(r1.handle, r2.handle):
fragments.append('')
fragments[-1]+=r2.one_letter_code
ss=str(seqres)
pos=0
aln_seq=''
for frag in fragments:
new_pos=ss.find(frag, pos)
if new_pos==-1:
raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
aln_seq+='-'*(new_pos-pos)+frag
pos=new_pos+len(frag)
aln_seq = seq.CreateSequence('atoms',
aln_seq+('-'*(len(seqres)-len(aln_seq))))
return seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), return seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
seq.CreateSequence('atoms', aln_seq)) aln_seq)
def AlignmentFromChainView(chain, handle_seq_name='handle', def AlignmentFromChainView(chain, handle_seq_name='handle',
......
...@@ -6,6 +6,7 @@ set(OST_SEQ_ALG_UNIT_TESTS ...@@ -6,6 +6,7 @@ set(OST_SEQ_ALG_UNIT_TESTS
test_local_align.py test_local_align.py
test_global_align.py test_global_align.py
test_weight_matrix.py test_weight_matrix.py
test_aligntoseqres.py
) )
ost_unittest(MODULE seq_alg SOURCES "${OST_SEQ_ALG_UNIT_TESTS}") ost_unittest(MODULE seq_alg SOURCES "${OST_SEQ_ALG_UNIT_TESTS}")
......
import unittest
from ost import *
from ost import settings
from ost import seq
from ost import io
class TestAlignToSeqRes(unittest.TestCase):
def testAlignWorking(self):
ent, seqres = io.LoadMMCIF("testfiles/align_to_seqres.mmcif")
chain = ent.FindChain("A")
sequence = seqres.FindSequence(chain.GetName());
seqres_aln = seq.alg.AlignToSEQRES(chain, sequence, try_resnum_first=False)
self.assertEqual(str(sequence), "MYTNSDFVVIKALEDGVNVIGLTRGADTRFHHSEKLDKGEV"+
"LIAQFTEHTSAIKVRGKAYIQTRHGVIESEGKK")
self.assertEqual(str(seqres_aln.sequences[1]), "----SDFVVIKALEDGVNVIGLTR--"+
"-TRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGK-")
seqres_aln = seq.alg.AlignToSEQRES(chain, sequence, try_resnum_first=True)
self.assertEqual(str(seqres_aln.sequences[1]), "----SDFVVIKALEDGVNVIGLTR--"+
"-TRFHHSEKLDKGEVLIAQFTEHTSAIKVRGKAYIQTRHGVIESEGK-")
def testAlignFail(self):
ent, seqres = io.LoadMMCIF("testfiles/align_to_seqres_valueerror.mmcif")
chain = ent.FindChain("A")
sequence = seqres.FindSequence(chain.GetName());
ost.PushVerbosityLevel(0)
self.assertRaises(ValueError, seq.alg.AlignToSEQRES, chain, sequence, True)
ost.PopVerbosityLevel()
if __name__ == "__main__":
try:
unittest.main()
except Exception, e:
print e
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment