Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
openstructure
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
schwede
openstructure
Commits
75f24bfe
Commit
75f24bfe
authored
6 years ago
by
Gerardo Tauriello
Browse files
Options
Downloads
Patches
Plain Diff
SCHWED-3472: Merged HHblits changed from SM into OST.
parent
b7dc72a6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
modules/bindings/doc/hhblits.rst
+2
-4
2 additions, 4 deletions
modules/bindings/doc/hhblits.rst
modules/bindings/pymod/hhblits.py
+51
-30
51 additions, 30 deletions
modules/bindings/pymod/hhblits.py
with
53 additions
and
34 deletions
modules/bindings/doc/hhblits.rst
+
2
−
4
View file @
75f24bfe
:mod:`~ost.bindings.hhblits` - Search related sequences in databases
================================================================================
.. module:: ost.bindings.hhblits
:synopsis: Search related sequences in databases
Introduction
--------------------------------------------------------------------------------
...
...
@@ -15,7 +12,7 @@ one is provided, queried with a sequence profile. The latter one needs to be
calculated before the actual search. In very simple words, HHblits is using
per-sequence scoring functions to be more sensitive, in this particular case
Hidden Markov models. The software suite needed for HHblits can be found
`here <http://
toolkit.tuebingen.mpg.de/hhblits
>`_.
`here <http://
wwwuser.gwdg.de/~compbiol/data/hhsuite/releases/all/
>`_.
Examples
...
...
@@ -110,6 +107,7 @@ Binding API
--------------------------------------------------------------------------------
.. automodule:: ost.bindings.hhblits
:synopsis: Search related sequences in databases
:members:
.. LocalWords: HHblits homologs
This diff is collapsed.
Click to expand it.
modules/bindings/pymod/hhblits.py
+
51
−
30
View file @
75f24bfe
...
...
@@ -460,6 +460,8 @@ class HHblits:
self
.
hhblits_bin
=
settings
.
Locate
(
'
hhblits
'
,
explicit_file_name
=
hhblits_bin
)
self
.
bin_dir
=
os
.
path
.
dirname
(
self
.
hhblits_bin
)
# guess root folder (note: this may fail in future)
self
.
hhsuite_root
=
os
.
path
.
dirname
(
self
.
bin_dir
)
self
.
hhlib_dir
=
os
.
path
.
join
(
self
.
hhsuite_root
,
'
lib
'
,
'
hh
'
)
if
working_dir
:
self
.
needs_cleanup
=
False
...
...
@@ -487,7 +489,8 @@ class HHblits:
self
.
working_dir
=
tmp_dir
.
dirname
self
.
filename
=
tmp_dir
.
files
[
0
]
def
BuildQueryMSA
(
self
,
nrdb
,
iterations
=
1
,
mact
=
None
,
cpu
=
1
):
def
BuildQueryMSA
(
self
,
nrdb
,
iterations
=
1
,
mact
=
None
,
cpu
=
1
,
cov
=
None
,
show_all
=
False
,
a3m_file
=
None
):
"""
Builds the MSA for the query sequence.
This function directly uses hhblits of hhtools. While in theory it would
...
...
@@ -503,24 +506,33 @@ class HHblits:
The predicted secondary structure is stored together with the sequences
identified by hhblits.
The produced A3M file can be parsed by :func:`ParseA3M`.
The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
already produced, hhblits is not called again and the existing file path
is returned.
:param nrdb: Database to be align against; has to be an hhblits database
:type nrdb: :class:`str`
:param iterations: Number of hhblits iterations
:type iterations: :class:`int`
:param mact: ``-mact`` of hhblits
:type mact: :class:`float`
:param cpu: ``-cpu`` of hhblits
:type cpu: :class:`int`
:param cov:
'
-cov
'
of hhblits
:type cov: :class:`int`
:param show_all:
'
-all
'
of hhblits
:type show_all: :class:`bool`
:param a3m_file: a path of a3m_file to be used, optional
:type a3m_file: :class:`str`
:return: The path to the A3M file containing the MSA
:rtype: :class:`str`
"""
a3m_file
=
'
%s.a3m
'
%
os
.
path
.
splitext
(
self
.
filename
)[
0
]
if
a3m_file
is
None
:
a3m_file
=
'
%s.a3m
'
%
os
.
path
.
splitext
(
self
.
filename
)[
0
]
if
os
.
path
.
exists
(
a3m_file
):
ost
.
LogInfo
(
'
Reusing already existing query alignment (%s)
'
%
a3m_file
)
return
a3m_file
ost
.
LogInfo
(
'
Using hhblits from
"
%s
"'
%
self
.
hhsuite_root
)
full_nrdb
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
split
(
nrdb
)[
0
]),
os
.
path
.
split
(
nrdb
)[
1
])
...
...
@@ -530,15 +542,16 @@ class HHblits:
full_nrdb
,
iterations
)
if
mact
:
hhblits_cmd
+=
'
-mact %f
'
%
mact
if
cov
is
not
None
:
hhblits_cmd
+=
'
-cov %i
'
%
cov
if
show_all
:
hhblits_cmd
+=
'
-all
'
job
=
subprocess
.
Popen
(
hhblits_cmd
,
shell
=
True
,
cwd
=
self
.
working_dir
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
sout
,
_
=
job
.
communicate
()
#lines = sout.splitlines()
#for l in lines:
# print l.strip()
#lines = serr.splitlines()
#for l in lines:
# print l.strip()
lines
=
sout
.
splitlines
()
for
l
in
lines
:
print
l
.
strip
()
if
not
os
.
path
.
exists
(
a3m_file
):
ost
.
LogWarning
(
'
Building query profile failed, no output
'
)
return
a3m_file
...
...
@@ -561,7 +574,7 @@ class HHblits:
if
'
error
'
in
line
.
lower
():
ost
.
LogWarning
(
'
Predicting secondary structure for MSA
'
+
'
(%s) failed, on command: %s
'
%
(
a3m_file
,
line
))
return
a3m_file
return
a3m_file
return
a3m_file
def
A3MToProfile
(
self
,
a3m_file
,
hhm_file
=
None
):
...
...
@@ -569,7 +582,10 @@ class HHblits:
Converts the A3M alignment file to a hhm profile. If hhm_file is not
given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.
The produced A3M file can be parsed by :func:`ParseA3M`.
The produced A3M file can be parsed by :func:`ParseHHM`.
If the file was already produced, the existing file path is returned
without recomputing it.
:param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
:type a3m_file: :class:`str`
...
...
@@ -598,17 +614,20 @@ class HHblits:
cs_file is not given, the output file will be set to
<:attr:`a3m_file`-basename>.seq219.
:param a3m_file: A3M file to be converted
If the file was already produced, the existing file path is returned
without recomputing it.
:param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
:type a3m_file: :class:`str`
:param cs_file:
o
utput file name (may be omitted)
:param cs_file:
O
utput file name (may be omitted)
:type cs_file: :class:`str`
:param options:
d
ictionary of options to *cstranslate*, must come with
:param options:
D
ictionary of options to *cstranslate*, must come with
the right amount of
'
-
'
in front.
:type options: :class:`dict`
:return:
the p
ath to the column state sequence file
:return:
P
ath to the column state sequence file
:rtype: :class:`str`
"""
cstranslate
=
os
.
path
.
join
(
self
.
hhlib_dir
,
'
bin
'
,
'
cstranslate
'
)
...
...
@@ -624,17 +643,18 @@ class HHblits:
else
:
opt_cmd
.
append
(
'
%s %s
'
%
(
str
(
k
),
str
(
val
)))
opt_cmd
=
'
'
.
join
(
opt_cmd
)
cs_cmd
=
'
%s -i %s -o %s %s
'
%
(
cstranslate
,
a3m_file
,
cs_file
,
opt_cmd
)
cs_cmd
=
'
%s -i %s -o %s %s
'
%
(
cstranslate
,
os
.
path
.
abspath
(
a3m_file
),
os
.
path
.
abspath
(
cs_file
),
opt_cmd
)
ost
.
LogVerbose
(
'
converting %s to %s
'
%
(
a3m_file
,
cs_file
))
job
=
subprocess
.
Popen
(
cs_cmd
,
shell
=
True
,
job
=
subprocess
.
Popen
(
cs_cmd
,
shell
=
True
,
cwd
=
self
.
working_dir
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
sout
,
_
=
job
.
communicate
()
#lines = serr.splitlines()
#for l in lines:
# print l
lines
=
sout
.
splitlines
()
for
line
in
lines
:
if
line
in
'
Wrote abstract state sequence to
%s
'
%
cs_fil
e
:
if
'
Wrote abstract state sequence to
'
in
lin
e
:
return
cs_file
ost
.
LogWarning
(
'
Creating column state sequence file (%s) failed
'
%
\
cs_file
)
...
...
@@ -665,21 +685,21 @@ class HHblits:
instances at once. Upon success, the filename of the result file is
returned. This file may be parsed with :func:`ParseHHblitsOutput`.
:param a3m_file: input MSA
file
:param a3m_file:
Path to
input MSA
as produced by :meth:`BuildQueryMSA`
:type a3m_file: :class:`str`
:param database:
s
earch database, needs to be the common prefix of the
:param database:
S
earch database, needs to be the common prefix of the
database files
:type database: :class:`str`
:param options:
d
ictionary of options, must come with the right amount
:param options:
D
ictionary of options, must come with the right amount
of
'
-
'
in front.
:type options: :class:`dict`
:param prefix:
p
refix to the result file
:param prefix:
P
refix to the result file
:type prefix: :class:`str`
:return:
t
he path to the result file
:return:
T
he path to the result file
:rtype: :class:`str`
"""
opts
=
{
'
cpu
'
:
1
,
# no. of cpus used
...
...
@@ -702,7 +722,8 @@ class HHblits:
hhr_file
=
os
.
path
.
join
(
self
.
working_dir
,
hhr_file
)
search_cmd
=
'
%s %s -e 0.001 -Z 10000 -B 10000 -i %s -o %s -d %s
'
%
(
self
.
hhblits_bin
,
opt_cmd
,
os
.
path
.
abspath
(
a3m_file
),
opt_cmd
,
os
.
path
.
abspath
(
a3m_file
),
hhr_file
,
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
split
(
database
)[
0
]),
os
.
path
.
split
(
database
)[
1
]))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment