Skip to content
Snippets Groups Projects
Commit d8fe10ea authored by Bienchen's avatar Bienchen
Browse files

Tested ParseHHM

parent 9dc803ab
Branches
Tags
No related merge requests found
...@@ -26,11 +26,11 @@ class HHblitsHit: ...@@ -26,11 +26,11 @@ class HHblitsHit:
.. attribute:: aln .. attribute:: aln
Pairwise alignment containing the aligned part between the query and the Pairwise alignment containing the aligned part between the query and the
target. First sequence is the query, the second sequence the target. target. First sequence is the query, the second sequence the target.
:type: :class:`ost.seq.AlignmentHandle` :type: :class:`ost.seq.AlignmentHandle`
.. attribute:: evalue .. attribute:: evalue
The E-value of the alignment The E-value of the alignment
...@@ -208,95 +208,91 @@ def ParseHHblitsOutput(output): ...@@ -208,95 +208,91 @@ def ParseHHblitsOutput(output):
return header, _ParseResultBody(header.query, hits, lines) return header, _ParseResultBody(header.query, hits, lines)
def ParseHHblitsProfile(profile): def ParseHHM(profile):
''' '''Parse secondary structure information and the MSA out of an HHM profile.
HHblits does not need profiles, it works on the MSA (a3m) file!
Do they contain all information? :param profile: Opened file handle holding the profile.
:type profile: :class:`file`
Parse secondary structure information and the MSA out of an HHM profile.
:return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf"
:param profile: Opened file handle holding the profile. (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and
:type profile: :class:`file` "consensus" (~ost.seq.SequenceHandle).
'''
:return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" profile_dict = dict()
(:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and state = 'NONE'
"consensus" (~ost.seq.SequenceHandle). pred_seq_txt = ''
''' conf_seq_txt = ''
profile_dict = dict() consensus_txt = ''
state = 'NONE' msa_seq = list()
pred_seq_txt = '' msa_head = list()
conf_seq_txt = '' for line in profile:
consensus_txt = '' if len(line.rstrip()) == 0:
msa_seq = list() continue
msa_head = list() if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure':
for line in profile: state = 'sspred'
if len(line.rstrip()) == 0: continue
continue elif line.rstrip() == '>ss_conf PSIPRED confidence values':
if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure': state = 'ssconf'
state = 'sspred' continue
continue elif line.rstrip() == '>Consensus':
elif line.rstrip() == '>ss_conf PSIPRED confidence values': state = 'consensus'
state = 'ssconf' continue
continue elif line[0] == '>':
elif line.rstrip() == '>Consensus': if state == 'consensus' or state == 'msa':
state = 'consensus' msa_seq.append('')
continue msa_head.append(line[1:].rstrip())
elif line[0] == '>': else:
if state == 'consensus' or state == 'msa': raise IOError('Profile file "%s" is missing ' % profile.name+
msa_seq.append('') 'the "Consensus" section')
msa_head.append(line[1:].rstrip()) state = 'msa'
else: continue
raise IOError('Profile file "%s" is missing the "Consensus" section' \ elif line[0] == '#':
% profile.name) state = 'NONE'
state = 'msa' continue
continue
elif line[0] == '#': if state == 'sspred':
state = 'NONE' pred_seq_txt += line.rstrip()
continue elif state == 'ssconf':
conf_seq_txt += line.rstrip()
if state == 'sspred': elif state == 'msa':
pred_seq_txt += line.rstrip() msa_seq[len(msa_seq)-1] += line.rstrip()
elif state == 'ssconf': elif state == 'consensus':
conf_seq_txt += line.rstrip() consensus_txt += line.rstrip()
elif state == 'msa':
msa_seq[len(msa_seq)-1] += line.rstrip() profile_dict['ss_pred'] = list()
elif state == 'consensus': profile_dict['ss_conf'] = list()
consensus_txt += line.rstrip() for i in range(0, len(pred_seq_txt)):
profile_dict['ss_pred'].append(pred_seq_txt[i])
profile_dict['ss_pred'] = list() profile_dict['ss_conf'].append(int(conf_seq_txt[i]))
profile_dict['ss_conf'] = list()
for i in range(0, len(pred_seq_txt)): # post processing
profile_dict['ss_pred'].append(pred_seq_txt[i]) # MSA
profile_dict['ss_conf'].append(int(conf_seq_txt[i])) profile_dict['msa'] = None
if len(msa_seq):
# post processing t = msa_seq[0]
# MSA al=seq.AlignmentList()
profile_dict['msa'] = None for i in range(1, len(msa_seq)):
if len(msa_seq): qs = ''
t = msa_seq[0] ts = ''
al=seq.AlignmentList() k = 0
for i in range(1, len(msa_seq)): for c in msa_seq[i]:
qs = '' if c.islower():
ts = '' qs += '-'
k = 0 ts += c.upper()
for c in msa_seq[i]: else:
if c.islower(): qs += t[k]
qs += '-' ts += c
ts += c.upper() k += 1
else: nl=seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
qs += t[k] seq.CreateSequence(msa_head[i], ts))
ts += c al.append(nl)
k += 1 profile_dict['msa'] = seq.alg.MergePairwiseAlignments(al,
nl=seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs),
seq.CreateSequence(msa_head[i], ts))
al.append(nl)
profile_dict['msa'] = seq.alg.MergePairwiseAlignments(al,
seq.CreateSequence(msa_head[0], t)) seq.CreateSequence(msa_head[0], t))
#print profile_dict['msa'].ToString(80) #print profile_dict['msa'].ToString(80)
# Consensus # Consensus
profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt) profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt)
return profile_dict return profile_dict
def EstimateMemConsumption(): def EstimateMemConsumption():
""" """
...@@ -561,4 +557,4 @@ class HHblits: ...@@ -561,4 +557,4 @@ class HHblits:
__all__= (HHblits) __all__= (HHblits)
# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact # LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact
# LocalWords: cpu hhm func ParseHHblitsOutput # LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa
...@@ -264,8 +264,32 @@ class TestHHblitsBindings(unittest.TestCase): ...@@ -264,8 +264,32 @@ class TestHHblitsBindings(unittest.TestCase):
'testfiles/hhblitsdb/hhblitsdb') 'testfiles/hhblitsdb/hhblitsdb')
self.assertEqual(search_file, None) self.assertEqual(search_file, None)
# search: def testParseHHMWorking(self):
# search with non-existing a3m # get info from an HHM file
with open("testfiles/test.hmm") as hhm_fh:
prof = hhblits.ParseHHM(hhm_fh)
self.assertEqual(''.join([str(x) for x in prof['ss_conf']]),
'999999999999998873391557999999998639441123987788888'+
'856788999999999998735477789999999887650299989899889'+
'999999997536679989999999999999999984329')
self.assertEqual(''.join(prof['ss_pred']), 'CCCHHHHHHHHHHHHHHHCCCHHHH'+
'HHHHHHHHHHCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHCCCCH'+
'HHHHHHHHHHHHHHCCCCHHHHHHHHHHHHHHHHHHCCCCCCHHHHHHHHH'+
'HHHHHHHHHHHHCC')
self.assertEqual(str(prof['consensus']), 'xltxxxxxxixxsWxxvxxxxxxxgxx'+
'xxxxlfxxxPxxxxxFxxxxxxxxxxxxxxhxxxvxxxlxxxixxldxxxx'+
'xlxxlxxxHxxxxgvxxxxxxxxxxxlxxxlxxxxgxxxxxxxxxAWxxxx'+
'xxixxxmxxxyx')
self.assertEqual(prof['msa'].GetCount(), 7)
def testParseHHMNotWorking(self):
# get info from an HHM file
with self.assertRaises(IOError) as ioe:
hhblits.ParseHHM(open('testfiles/testali.a3m'))
self.assertEqual(ioe.exception.message,
'Profile file "testfiles/testali.a3m" is missing '+
'the "Consensus" section')
if __name__ == "__main__": if __name__ == "__main__":
hhsuite_root_dir = os.getenv('EBROOTHHMINSUITE') hhsuite_root_dir = os.getenv('EBROOTHHMINSUITE')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment