Skip to content
Snippets Groups Projects
Commit 14ddc961 authored by Bienchen's avatar Bienchen
Browse files

ParseA3M

parent 9b1ac0e6
No related branches found
No related tags found
No related merge requests found
...@@ -197,6 +197,81 @@ def ParseHHblitsOutput(output): ...@@ -197,6 +197,81 @@ def ParseHHblitsOutput(output):
hits = _ParseTableOfContents(lines) hits = _ParseTableOfContents(lines)
return header, _ParseResultBody(header.query, hits, lines) return header, _ParseResultBody(header.query, hits, lines)
def ParseA3M(a3m_file):
'''
Parse secondary structure information and the multiple sequence alignment
out of an A3M file.
:param a3m_file: Iterable containing the lines of the A3M file
:type a3m_file: iterable, e.g. an openend file
:return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
(:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`).
'''
profile_dict = dict()
state = 'NONE'
pred_seq_txt = ''
conf_seq_txt = ''
msa_seq = list()
msa_head = list()
for line in a3m_file:
if len(line.rstrip()) == 0:
continue
elif line.startswith('>ss_pred'):
state = 'sspred'
continue
elif line.startswith('>ss_conf'):
state = 'ssconf'
continue
elif line[0] == '>':
if state == 'ssconf' or state == 'msa':
msa_seq.append('')
msa_head.append(line[1:].rstrip())
else:
raise IOError('The A3M file is missing the "ss_conf" section')
state = 'msa'
continue
if state == 'sspred':
pred_seq_txt += line.rstrip()
elif state == 'ssconf':
conf_seq_txt += line.rstrip()
elif state == 'msa':
msa_seq[len(msa_seq)-1] += line.rstrip()
profile_dict['ss_pred'] = list()
profile_dict['ss_conf'] = list()
for i in range(0, len(pred_seq_txt)):
profile_dict['ss_pred'].append(pred_seq_txt[i])
profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
# post processing
# MSA
profile_dict['msa'] = None
if len(msa_seq) > 1:
t = msa_seq[0]
al = seq.AlignmentList()
for i in range(1, len(msa_seq)):
qs = ''
ts = ''
k = 0
for c in msa_seq[i]:
if c.islower():
qs += '-'
ts += c.upper()
else:
qs += t[k]
ts += c
k += 1
nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
seq.CreateSequence(msa_head[i], ts))
al.append(nl)
profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\
al,
seq.CreateSequence(msa_head[0],
t))
return profile_dict
def ParseHHM(profile): def ParseHHM(profile):
'''Parse secondary structure information and the MSA out of an HHM profile. '''Parse secondary structure information and the MSA out of an HHM profile.
......
...@@ -291,6 +291,20 @@ class TestHHblitsBindings(unittest.TestCase): ...@@ -291,6 +291,20 @@ class TestHHblitsBindings(unittest.TestCase):
'Profile file "testfiles/testali.a3m" is missing '+ 'Profile file "testfiles/testali.a3m" is missing '+
'the "Consensus" section') 'the "Consensus" section')
def testParseA3MWorking(self):
# get info from an HHM file
with open("testfiles/testali.a3m") as a3m_fh:
prof = hhblits.ParseA3M(a3m_fh)
self.assertEqual(''.join([str(x) for x in prof['ss_conf']]),
'999999999999998873391557999999998639441123987788888'+
'856788999999999998735477789999999887650299989899889'+
'999999997536679989999999999999999984329')
self.assertEqual(''.join(prof['ss_pred']), 'CCCHHHHHHHHHHHHHHHCCCHHHH'+
'HHHHHHHHHHCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHCCCCH'+
'HHHHHHHHHHHHHHCCCCHHHHHHHHHHHHHHHHHHCCCCCCHHHHHHHHH'+
'HHHHHHHHHHHHCC')
self.assertEqual(prof['msa'].GetCount(), 253)
def testParseHHblitsOutput(self): def testParseHHblitsOutput(self):
header, hits = hhblits.ParseHHblitsOutput(open("testfiles/test.hhr")) header, hits = hhblits.ParseHHblitsOutput(open("testfiles/test.hhr"))
self.assertEqual(header.query, 'Test') self.assertEqual(header.query, 'Test')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment