Skip to content
Snippets Groups Projects
Commit 9bb56462 authored by Gerardo Tauriello's avatar Gerardo Tauriello
Browse files

SCHWED-3472: Standardize options-passing for HHblits bindings.

parent d751f00c
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ Changes in Release <RELEASE NUMBER>
* Extended lDDT API in ost.mol.alg module to reproduce functionality of lddt
binary.
* Added `actions` interface including one action to compare structures.
* Updated HHblits binding (minor changes for optional arguments).
Changes in Release 1.7.1
--------------------------------------------------------------------------------
......
......@@ -56,6 +56,9 @@ First query by sequence:
for hit in hits:
print hit.aln
# cleanup
hh.Cleanup()
Very similar going by file:
.. code-block:: python
......
......@@ -489,8 +489,7 @@ class HHblits:
self.working_dir = tmp_dir.dirname
self.filename = tmp_dir.files[0]
def BuildQueryMSA(self, nrdb, iterations=1, mact=None, cpu=1, cov=None,
show_all=False, a3m_file=None):
def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
"""Builds the MSA for the query sequence.
This function directly uses hhblits of hhtools. While in theory it would
......@@ -512,16 +511,13 @@ class HHblits:
:param nrdb: Database to be align against; has to be an hhblits database
:type nrdb: :class:`str`
:param iterations: Number of hhblits iterations
:type iterations: :class:`int`
:param mact: ``-mact`` of hhblits
:type mact: :class:`float`
:param cpu: ``-cpu`` of hhblits
:type cpu: :class:`int`
:param cov: '-cov' of hhblits
:type cov: :class:`int`
:param show_all: '-all' of hhblits
:type show_all: :class:`bool`
:param options: Dictionary of options to *hhblits*, one "-" is added in
front of every key. Boolean True values add flag without
value. Merged with default options {'cpu': 1, 'n': 1},
where 'n' defines the number of iterations.
:type options: :class:`dict`
:param a3m_file: a path of a3m_file to be used, optional
:type a3m_file: :class:`str`
......@@ -537,15 +533,13 @@ class HHblits:
full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
os.path.split(nrdb)[1])
# create MSA
hhblits_cmd = '%s -e 0.001 -cpu %d -i %s -oa3m %s -d %s -n %d' % \
(self.hhblits_bin, cpu, self.filename, a3m_file,
full_nrdb, iterations)
if mact:
hhblits_cmd += '-mact %f' % mact
if cov is not None:
hhblits_cmd += ' -cov %i' % cov
if show_all:
hhblits_cmd += ' -all'
opts = {'cpu' : 1, # no. of cpus used
'n' : 1} # no. of iterations
opts.update(options)
opt_cmd, _ = _ParseOptions(opts)
hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
(self.hhblits_bin, self.filename, a3m_file, full_nrdb,
opt_cmd)
job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sout, _ = job.communicate()
......@@ -623,8 +617,9 @@ class HHblits:
:param cs_file: Output file name (may be omitted)
:type cs_file: :class:`str`
:param options: Dictionary of options to *cstranslate*, must come with
the right amount of '-' in front.
:param options: Dictionary of options to *cstranslate*, one "-" is added
in front of every key. Boolean True values add flag
without value.
:type options: :class:`dict`
:return: Path to the column state sequence file
......@@ -635,14 +630,7 @@ class HHblits:
cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
if os.path.exists(cs_file):
return cs_file
opt_cmd = list()
for k, val in options.iteritems():
if type(val) == type(True):
if val == True:
opt_cmd.append('%s' % str(k))
else:
opt_cmd.append('%s %s' % (str(k), str(val)))
opt_cmd = ' '.join(opt_cmd)
opt_cmd, _ = _ParseOptions(options)
cs_cmd = '%s -i %s -o %s %s' % (
cstranslate,
os.path.abspath(a3m_file),
......@@ -692,8 +680,10 @@ class HHblits:
database files
:type database: :class:`str`
:param options: Dictionary of options, must come with the right amount
of '-' in front.
:param options: Dictionary of options to *hhblits*, one "-" is added in
front of every key. Boolean True values add flag without
value. Merged with default options {'cpu': 1, 'n': 1},
where 'n' defines the number of iterations.
:type options: :class:`dict`
:param prefix: Prefix to the result file
......@@ -705,18 +695,7 @@ class HHblits:
opts = {'cpu' : 1, # no. of cpus used
'n' : 1} # no. of iterations
opts.update(options)
opt_cmd = []
opt_str = []
for k, val in opts.iteritems():
if type(val) == type(True):
if val == True:
opt_cmd.append('-%s' % str(k))
opt_str.append(str(k))
else:
opt_cmd.append('-%s %s' % (str(k), str(val)))
opt_str.append('%s%s' % (str(k), str(val)))
opt_cmd = ' '.join(opt_cmd)
opt_str = '_'.join(opt_str)
opt_cmd, opt_str = _ParseOptions(opts)
base = os.path.basename(os.path.splitext(a3m_file)[0])
hhr_file = '%s%s_%s.hhr' % (prefix, base, opt_str)
hhr_file = os.path.join(self.working_dir, hhr_file)
......@@ -743,11 +722,35 @@ class HHblits:
return hhr_file
def _ParseOptions(opts):
"""
:return: Tuple of strings (opt_cmd, opt_str), where opt_cmd can be
passed to command ("-" added in front of keys, options
separated by space) and opt_str (options separated by "_")
can be used for filenames.
:param opts: Dictionary of options, one "-" is added in front of every
key. Boolean True values add flag without value.
"""
opt_cmd = list()
opt_str = list()
for k, val in opts.iteritems():
if type(val) == type(True):
if val == True:
opt_cmd.append('-%s' % str(k))
opt_str.append(str(k))
else:
opt_cmd.append('-%s %s' % (str(k), str(val)))
opt_str.append('%s%s' % (str(k), str(val)))
opt_cmd = ' '.join(opt_cmd)
opt_str = '_'.join(opt_str)
return opt_cmd, opt_str
__all__ = ['HHblits', 'HHblitsHit', 'HHblitsHeader',
'ParseHHblitsOutput', 'ParseA3M', 'ParseHHM',
'ParseHeaderLine']
# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str mact
# LocalWords: HHblits MSA hhblits hhtools PSIPRED addss param nrdb str
# LocalWords: cpu hhm func ParseHHblitsOutput ss pred conf msa hhsuite dir
# LocalWords: attr basename rtype cstranslate tuple HHblitsHeader meth aln
# LocalWords: HHblitsHit iterable evalue pvalue neff hmms datetime
......
......@@ -196,7 +196,7 @@ class TestHHblitsBindings(unittest.TestCase):
_, self.tmpfile = tempfile.mkstemp(suffix='.seq219')
os.remove(self.tmpfile)
csfile = self.hh.A3MToCS("testfiles/testali.a3m",
cs_file=self.tmpfile, options={'--alphabet' :
cs_file=self.tmpfile, options={'-alphabet' :
os.path.join(self.hh.hhlib_dir,
'data',
'cs219.lib')})
......@@ -211,7 +211,7 @@ class TestHHblitsBindings(unittest.TestCase):
'TSKYR')
self.hh = hhblits.HHblits(query_seq, self.hhroot)
csfile = self.hh.A3MToCS("testfiles/testali.a3m",
options={'--alphabet' :
options={'-alphabet' :
os.path.join(self.hh.hhlib_dir,
'data',
'cs219.lib')})
......@@ -228,7 +228,7 @@ class TestHHblitsBindings(unittest.TestCase):
self.hh = hhblits.HHblits(query_seq, self.hhroot)
csfile = self.hh.A3MToCS("testfiles/testali.a3m",
cs_file='testfiles/test.seq219',
options={'--alphabet' :
options={'-alphabet' :
os.path.join(self.hh.hhlib_dir,
'data',
'cs219.lib')})
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment