Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
4d65fea6
Commit
4d65fea6
authored
6 years ago
by
Gerardo Tauriello
Browse files
Options
Downloads
Patches
Plain Diff
Doc and style fixes in HHblits binding.
parent
1f86bfe5
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/bindings/doc/hhblits.rst
+1
-14
1 addition, 14 deletions
modules/bindings/doc/hhblits.rst
modules/bindings/pymod/hhblits.py
+72
-68
72 additions, 68 deletions
modules/bindings/pymod/hhblits.py
with
73 additions
and
82 deletions
modules/bindings/doc/hhblits.rst
+
1
−
14
View file @
4d65fea6
...
...
@@ -109,20 +109,7 @@ so one may want to extract them:
Binding API
--------------------------------------------------------------------------------
.. auto
class
:: ost.bindings.hhblits
.HHblits
.. auto
module
:: ost.bindings.hhblits
:members:
.. autoclass:: ost.bindings.hhblits.HHblitsHit
.. autoclass:: ost.bindings.hhblits.HHblitsHeader
.. autofunction:: ost.bindings.hhblits.ParseHHblitsOutput
.. autofunction:: ost.bindings.hhblits.ParseA3M
.. autofunction:: ost.bindings.hhblits.ParseHeaderLine
.. autofunction:: ost.bindings.hhblits.ParseHHM
.. autofunction:: ost.bindings.hhblits.EstimateMemConsumption
.. LocalWords: HHblits homologs
This diff is collapsed.
Click to expand it.
modules/bindings/pymod/hhblits.py
+
72
−
68
View file @
4d65fea6
'''
HHblits wrapper.
'''
HHblits wrapper
classes and functions
.
'''
import
subprocess
...
...
@@ -124,8 +124,8 @@ def ParseHeaderLine(line):
:param line: Line from the output header.
:type line: :class:`str`
:return: Hit information
:rtype: :class:`HHblitsHit`
:return: Hit information
and query/template offsets
:rtype:
(
:class:`HHblitsHit`
, (:class:`int`, :class:`int`))
'''
for
i
in
range
(
0
,
len
(
line
)):
if
line
[
i
].
isdigit
():
...
...
@@ -147,15 +147,14 @@ def ParseHeaderLine(line):
def
ParseHHblitsOutput
(
output
):
"""
Parses the HHblits output a
nd returns a tuple of :class:`HHblitsHeader` and
a list of :class:`HHblitsHit` instance
s.
Parses the HHblits output a
s produced by :meth:`HHblits.Search` and returns
the header of the search results and a list of hit
s.
:param output: output of a :meth:`HHblits.Search`, needs to be iteratable,
e.g. an open file handle
:type output: :class:`file`/ iteratable
:param output: Iterable containing the lines of the HHblits output file
:type output: iterable (e.g. an open file handle)
:return: a tuple of the header of the search results and the hits
:rtype: (:class:`HHblitsHeader`, :class:`HHblitsHit`)
:rtype: (:class:`HHblitsHeader`,
:class:`list` of
:class:`HHblitsHit`)
"""
lines
=
iter
(
output
)
def
_ParseHeaderSection
(
lines
):
...
...
@@ -271,10 +270,10 @@ def ParseHHblitsOutput(output):
def
ParseA3M
(
a3m_file
):
'''
Parse secondary structure information and the multiple sequence alignment
out of an A3M file.
out of an A3M file
as produced by :meth:`HHblits.BuildQueryMSA`
.
:param a3m_file: Itera
ta
ble containing the lines of the A3M file
:type a3m_file: itera
ta
ble
,
e.g. an open
ed
file
:param a3m_file: Iterable containing the lines of the A3M file
:type a3m_file: iterable
(
e.g. an open file
handle)
:return: Dictionary containing
"
ss_pred
"
(:class:`list`),
"
ss_conf
"
(:class:`list`) and
"
msa
"
(:class:`~ost.seq.AlignmentHandle`).
...
...
@@ -323,36 +322,36 @@ def ParseA3M(a3m_file):
t
=
msa_seq
[
0
]
al
=
seq
.
AlignmentList
()
for
i
in
range
(
1
,
len
(
msa_seq
)):
qs
=
''
ts
=
''
k
=
0
for
c
in
msa_seq
[
i
]:
if
c
.
islower
():
qs
+=
'
-
'
ts
+=
c
.
upper
()
else
:
qs
+=
t
[
k
]
ts
+=
c
k
+=
1
nl
=
seq
.
CreateAlignment
(
seq
.
CreateSequence
(
msa_head
[
0
],
qs
),
seq
.
CreateSequence
(
msa_head
[
i
],
ts
))
al
.
append
(
nl
)
qs
=
''
ts
=
''
k
=
0
for
c
in
msa_seq
[
i
]:
if
c
.
islower
():
qs
+=
'
-
'
ts
+=
c
.
upper
()
else
:
qs
+=
t
[
k
]
ts
+=
c
k
+=
1
nl
=
seq
.
CreateAlignment
(
seq
.
CreateSequence
(
msa_head
[
0
],
qs
),
seq
.
CreateSequence
(
msa_head
[
i
],
ts
))
al
.
append
(
nl
)
profile_dict
[
'
msa
'
]
=
seq
.
alg
.
MergePairwiseAlignments
(
\
al
,
seq
.
CreateSequence
(
msa_head
[
0
],
t
))
al
,
seq
.
CreateSequence
(
msa_head
[
0
],
t
))
return
profile_dict
def
ParseHHM
(
profile
):
'''
Parse secondary structure information and the MSA out of an HHM profile.
'''
Parse secondary structure information and the MSA out of an HHM profile as
produced by :meth:`HHblits.A3MToProfile`.
:param profile: Opened file handle holding the profile.
:type profile: :class:`file`
:return: Dictionary containing
"
ss_pred
"
(:class:`list`),
"
ss_conf
"
(:class:`list`),
"
msa
"
(:class:`~ost.seq.AlignmentHandle`) and
"
consensus
"
(~ost.seq.SequenceHandle).
"
consensus
"
(
:class:`
~ost.seq.SequenceHandle
`
).
'''
profile_dict
=
dict
()
state
=
'
NONE
'
...
...
@@ -423,9 +422,8 @@ def ParseHHM(profile):
seq
.
CreateSequence
(
msa_head
[
i
],
ts
))
al
.
append
(
nl
)
profile_dict
[
'
msa
'
]
=
seq
.
alg
.
MergePairwiseAlignments
(
\
al
,
seq
.
CreateSequence
(
msa_head
[
0
],
t
))
#print profile_dict['msa'].ToString(80)
al
,
seq
.
CreateSequence
(
msa_head
[
0
],
t
))
#print profile_dict['msa'].ToString(80)
# Consensus
profile_dict
[
'
consensus
'
]
=
seq
.
CreateSequence
(
'
Consensus
'
,
consensus_txt
)
...
...
@@ -501,28 +499,23 @@ class HHblits:
self
.
working_dir
=
tmp_dir
.
dirname
self
.
filename
=
tmp_dir
.
files
[
0
]
def
Cleanup
(
self
):
"""
Delete temporary data
.
def
BuildQueryMSA
(
self
,
nrdb
,
iterations
=
1
,
mact
=
None
,
cpu
=
1
):
"""
Builds the MSA for the query sequence
.
Delete temporary data if no working dir was given. Controlled by
:attr:`needs_cleanup`.
"""
if
self
.
needs_cleanup
and
os
.
path
.
exists
(
self
.
working_dir
):
shutil
.
rmtree
(
self
.
working_dir
)
This function directly uses hhblits of hhtools. While in theory it would
be possible to do this by PSI-blasting on our own, hhblits is supposed
to be faster. Also it is supposed to prevent alignment corruption. The
alignment corruption is caused by low-scoring terminal alignments that
draw the sequences found by PSI-blast away from the optimum. By removing
these low scoring ends, part of the alignment corruption can be
suppressed.
def
BuildQueryMSA
(
self
,
nrdb
,
iterations
=
1
,
mact
=
None
,
cpu
=
1
):
"""
Builds the MSA for the query sequence
This function directly uses hhblits of hhtools. While in theory it
would be possible to do this by PSI-blasting on our own, hhblits is
supposed to be faster. Also it is supposed to prevent alignment
corruption. The alignment corruption is caused by low-scoring terminal
alignments that draw the sequences found by PSI-blast away from the
optimum. By removing these low scoring ends, part of the alignment
corruption can be suppressed. hhblits does **not** call PSIPRED on the
MSA to predict the secondary structure of the query sequence. This is
done by addss.pl of hhtools. The predicted secondary structure is
stored together with the sequences identified by hhblits.
hhblits does **not** call PSIPRED on the MSA to predict the secondary
structure of the query sequence. This is done by addss.pl of hhtools.
The predicted secondary structure is stored together with the sequences
identified by hhblits.
The produced A3M file can be parsed by :func:`ParseA3M`.
:param nrdb: Database to be align against; has to be an hhblits database
:type nrdb: :class:`str`
...
...
@@ -536,7 +529,7 @@ class HHblits:
:param cpu: ``-cpu`` of hhblits
:type cpu: :class:`int`
:return:
t
he path to the
MSA
file
:return:
T
he path to the
A3M
file
containing the MSA
:rtype: :class:`str`
"""
a3m_file
=
'
%s.a3m
'
%
os
.
path
.
splitext
(
self
.
filename
)[
0
]
...
...
@@ -588,13 +581,15 @@ class HHblits:
Converts the A3M alignment file to a hhm profile. If hhm_file is not
given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
:param a3m_file: input MSA
The produced A3M file can be parsed by :func:`ParseA3M`.
:param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
:type a3m_file: :class:`str`
:param hhm_file: output file name
:param hhm_file:
Desired
output file name
:type hhm_file: :class:`str`
:return:
the p
ath to the profile
:return:
P
ath to the profile
file
:rtype: :class:`str`
"""
hhmake
=
os
.
path
.
join
(
self
.
bin_dir
,
'
hhmake
'
)
...
...
@@ -609,7 +604,6 @@ class HHblits:
raise
IOError
(
'
could not convert a3m to hhm file
'
)
return
hhm_file
def
A3MToCS
(
self
,
a3m_file
,
cs_file
=
None
,
options
=
{}):
"""
Converts the A3M alignment file to a column state sequence file. If
...
...
@@ -657,8 +651,19 @@ class HHblits:
ost
.
LogWarning
(
'
Creating column state sequence file (%s) failed
'
%
\
cs_file
)
def
Cleanup
(
self
):
"""
Delete temporary data.
Delete temporary data if no working dir was given. Controlled by
:attr:`needs_cleanup`.
"""
if
self
.
needs_cleanup
and
os
.
path
.
exists
(
self
.
working_dir
):
shutil
.
rmtree
(
self
.
working_dir
)
def
CleanupFailed
(
self
):
'''
In case something went wrong, call to make sure everything is clean.
This will delete the working dir independently of :attr:`needs_cleanup`.
'''
store_needs_cleanup
=
self
.
needs_cleanup
self
.
needs_cleanup
=
True
...
...
@@ -667,11 +672,10 @@ class HHblits:
def
Search
(
self
,
a3m_file
,
database
,
options
=
{},
prefix
=
''
):
"""
Searches for templates in the given database. Before running the
search, the hhm file is copied. This makes it possible to launch
several hhblits instances at once. Upon success, the filename of the
result file is returned. This file may be parsed with
:func:`ParseHHblitsOutput`.
Searches for templates in the given database. Before running the search,
the hhm file is copied. This makes it possible to launch several hhblits
instances at once. Upon success, the filename of the result file is
returned. This file may be parsed with :func:`ParseHHblitsOutput`.
:param a3m_file: input MSA file
:type a3m_file: :class:`str`
...
...
@@ -708,7 +712,7 @@ class HHblits:
base
=
os
.
path
.
basename
(
os
.
path
.
splitext
(
a3m_file
)[
0
])
hhr_file
=
'
%s%s_%s.hhr
'
%
(
prefix
,
base
,
opt_str
)
hhr_file
=
os
.
path
.
join
(
self
.
working_dir
,
hhr_file
)
search_cmd
=
'
%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s
'
%
(
search_cmd
=
'
%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s
'
%
(
self
.
hhblits_bin
,
opt_cmd
,
os
.
path
.
abspath
(
a3m_file
),
hhr_file
,
...
...
@@ -730,12 +734,12 @@ class HHblits:
return
hhr_file
__all__
=
[
'
HHblits
'
,
'
HHblitsHit
'
,
'
HHblitsHeader
'
,
'
ParseHeaderLine
'
,
__all__
=
[
'
HHblits
'
,
'
HHblitsHit
'
,
'
HHblitsHeader
'
,
'
ParseHHblitsOutput
'
,
'
ParseA3M
'
,
'
ParseHHM
'
,
'
EstimateMemConsumption
'
]
'
ParseHeaderLine
'
,
'
EstimateMemConsumption
'
]
# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact
# LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
# LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
# LocalWords: HHblitsHit itera
ta
ble evalue pvalue neff hmms datetime
# LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
# LocalWords: whitespace whitespaces
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment