Skip to content
Snippets Groups Projects
Commit 9f2d6e21 authored by Bienchen's avatar Bienchen
Browse files

Alignments via JSON

parent ca821918
Branches
Tags
No related merge requests found
...@@ -7,12 +7,159 @@ import sys ...@@ -7,12 +7,159 @@ import sys
import os import os
import gzip import gzip
import tempfile import tempfile
#try:
# import ujson as json
#except ImportError:
import json
import ost import ost
from ost import io, seq from ost import io, seq
from promod3.core import helper from promod3.core import helper
def _TmpForGZip(filename, suffix, msg_prefix):
"""Unpack a file to a tmp file if gzipped.
"""
helper.FileExists(msg_prefix, 12, filename)
zip_fh = gzip.open(filename)
unzip_str = zip_fh.read()
zip_fh.close()
unzip_file = tempfile.NamedTemporaryFile(mode='w', suffix=suffix)
unzip_file.write(unzip_str)
unzip_file.flush()
return unzip_file
def _CheckJSONAlnSeqKeyType(key_name, val_type, json_aln, seqtype, json_source):
'''Check a key/ value in a sequence exists and is of certain type.
'''
if key_name not in json_aln[seqtype].keys():
helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
"from '%s' is " % json_source+
"missing the '%s' key" % key_name, 27)
altype = type(json_aln[seqtype][key_name])
if val_type is str or val_type is unicode:
if not (altype is unicode or altype is str):
helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
"'%s' from" % key_name+
"'%s' is not a " % json_source+
"%s" % str(val_type), 28)
elif not altype is val_type:
helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+
"'%s' from" % key_name+
"'%s' is not a " % json_source+
"%s" % str(val_type), 28)
def _CreateNewAln(trg_name, trg_seq, trg_start, trg_end, tpl_name, tpl_seq,
tpl_offset):
# iternal function to makes things easier in other places, pylint ignored
#pylint: disable=too-many-arguments
'''Produce a new target-template alignment
'''
new_aln = seq.CreateAlignment(seq.CreateSequence(\
trg_name,
trg_seq[trg_start:trg_end]),
seq.CreateSequence(tpl_name, tpl_seq))
new_aln.SetSequenceRole(0, 'TARGET')
new_aln.SetSequenceRole(1, 'TEMPLATE')
new_aln.SetSequenceOffset(1, tpl_offset)
return new_aln
def _GetAlnFromJSON(json_object, json_source):
"""Create alignments from a JSON object.
Iterate the alignments in a JSON object and deliver OST alignments via the
yield operator.
"""
# alignments are stored via the 'alignmentlist' key
if 'alignmentlist' not in json_object.keys():
helper.MsgErrorAndExit("JSON object from '%s' does not " % json_source+
"provide an 'alignmentlist' key.", 21)
# alignments come as lists, to enable hetero oligos
if not type(json_object['alignmentlist']) is list:
helper.MsgErrorAndExit("JSON object from '%s' does not" % json_source+
"provide a list behind 'alignmentlist'.", 24)
# take the alignments apart, each alignment is a dictionary
for json_aln in json_object['alignmentlist']:
# json_aln needs to be a dictionary
if not type(json_aln) is dict:
helper.MsgErrorAndExit("JSON 'alignmentlist' member from "+
"'%s' is not a ' " % json_source+
" dictionary: %s" % json_aln, 25)
# an alignment has a 'target' and a 'template' dictionary
# each of them has a 'name' and a 'seqres' pair
for flav in ['target', 'template']:
if flav not in json_aln.keys():
helper.MsgErrorAndExit("JSON 'alignmentlist' from "+
"'%s' does not " % json_source+
"provide a '%s' key." % flav, 22)
# check sequence to be dictionary
if not type(json_aln[flav]) is dict:
helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' from" % flav+
"'%s' is not a " % json_source+
"dictionary: %s" % json_aln[flav], 26)
# check for keys needed by both sequences:
for aln_key in ['name', 'seqres']:
_CheckJSONAlnSeqKeyType(aln_key, str, json_aln, flav,
json_source)
_CheckJSONAlnSeqKeyType('offset', int, json_aln, 'template',
json_source)
yield _CreateNewAln(str(json_aln['target']['name']).strip(),
str(json_aln['target']['seqres']),
0,
len(json_aln['target']['seqres']),
str(json_aln['template']['name']).strip(),
str(json_aln['template']['seqres']),
json_aln['template']['offset'])
def _GetJSONOBject(json_input):
"""Get a JSON object out of a string which may be an object or a path.
If the input string starts with '{', we assume its a JSON object. File names
starting with '{' would be a bit weird.
If we are looking at a file, check and load it.
For a JSON object, check that everything is there. No checks for
superfluous stuff.
As returnvalue we only use JSON objects.
"""
if json_input[0] != '{':
is_gz = helper.FileGzip("JSON alignment", 13, json_input)
readfile = json_input
if is_gz:
unzip_file = _TmpForGZip(json_input, '.json', "JSON alignment")
readfile = unzip_file.name
try:
jfh = open(readfile)
except IOError, ioe:
helper.MsgErrorAndExit("'--json' file '%s' " % json_input+
"can not be processed: %s" % ioe.strerror,
19)
except:
raise
try:
json_object = json.load(jfh)
except ValueError, vae:
if vae.message == 'No JSON object could be decoded':
helper.MsgErrorAndExit("'--json' file '%s' could " % json_input+
"not be processed into a JSON object, "+
"probably it's empty.", 20)
else:
raise
except:
raise
jfh.close()
else:
try:
json_object = json.loads(json_input)
except ValueError, vae:
helper.MsgErrorAndExit("'--json' string '%s' " % json_input+\
"could not be decoded: %s" % vae.message, 23)
return json_object
def _GetTrgNameSeqFile(argstr): def _GetTrgNameSeqFile(argstr):
"""Sort out what is target name and what is the sequence file name. """Sort out what is target name and what is the sequence file name.
...@@ -50,13 +197,7 @@ def _FetchAlnFromFastaOpt(argstr): ...@@ -50,13 +197,7 @@ def _FetchAlnFromFastaOpt(argstr):
# loading the alignment, switch for gzip # loading the alignment, switch for gzip
readfile = seqfile readfile = seqfile
if is_gz: if is_gz:
zip_fh = gzip.open(seqfile) unzip_file = _TmpForGZip(seqfile, '.fas', "Alignment")
unzip_str = zip_fh.read()
zip_fh.close()
unzip_file = tempfile.NamedTemporaryFile(mode='w',
suffix='.fas')
unzip_file.write(unzip_str)
unzip_file.flush()
readfile = unzip_file.name readfile = unzip_file.name
try: try:
aln = io.LoadAlignment(readfile, format="fasta") aln = io.LoadAlignment(readfile, format="fasta")
...@@ -114,12 +255,8 @@ def _AssembleTrgTplAln(target, template): ...@@ -114,12 +255,8 @@ def _AssembleTrgTplAln(target, template):
tpl_str = '' tpl_str = ''
for i in range(start, end): for i in range(start, end):
tpl_str += template[i] tpl_str += template[i]
new_aln = seq.CreateAlignment(seq.CreateSequence(target.name.strip(), return _CreateNewAln(target.name.strip(), str(target), start, end,
str(target)[start:end]), template.name.strip(), tpl_str, start)
seq.CreateSequence(template.name.strip(),
tpl_str))
new_aln.SetSequenceOffset(1, start)
return new_aln
class PM3StoreOnceAction(argparse.Action): class PM3StoreOnceAction(argparse.Action):
...@@ -207,7 +344,7 @@ class PM3ArgumentParser(argparse.ArgumentParser): ...@@ -207,7 +344,7 @@ class PM3ArgumentParser(argparse.ArgumentParser):
:param args: The argument string. As default |sysargv|_ is used. :param args: The argument string. As default |sysargv|_ is used.
:type args: :class:`list` :type args: :class:`list`
:returns: :class:`promod3.cor.pm3argparse.PM3OptionsNamespace`. :returns: :class:`promod3.core.pm3argparse.PM3OptionsNamespace`.
""" """
opts = PM3OptionsNamespace() opts = PM3OptionsNamespace()
self.parse_args(args=args, namespace=opts) self.parse_args(args=args, namespace=opts)
...@@ -227,7 +364,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): ...@@ -227,7 +364,8 @@ class PM3ArgumentParser(argparse.ArgumentParser):
self._AssembleAlignment() self._AssembleAlignment()
def AddAlignment(self): def AddAlignment(self):
""" """Commandline options for alignments.
Add everything needed to load alignments to the argument parser. Creates Add everything needed to load alignments to the argument parser. Creates
several options/ arguments and adds some checks for post processing. several options/ arguments and adds some checks for post processing.
This method only adds a flag to the parser to add alignment options on This method only adds a flag to the parser to add alignment options on
...@@ -263,6 +401,29 @@ class PM3ArgumentParser(argparse.ArgumentParser): ...@@ -263,6 +401,29 @@ class PM3ArgumentParser(argparse.ArgumentParser):
* 18 - sequences in the alignment have different length * 18 - sequences in the alignment have different length
* 19 - problem with a JSON formatted file handed over to ``--json``
* 20 - JSON file could not be decoded into a JSON object
* 21 - JSON object has no 'alignmentlist' key
* 22 - JSON object has no 'target'/ 'template' in the 'alignmentlist'
* 23 - JSON string could not be decoded
* 24 - JSON object 'alignmentlist' does not point to a list
* 25 - JSON object 'alignmentlist' member is not a dictionary
* 26 - JSON object 'alignmentlist' 'target'/ 'template' does not point
to a dictionary
* 27 - JSON object 'alignmentlist' 'target'/ 'template' does not have
a needed key
* 28 - JSON object 'alignmentlist' 'target'/ 'template' has a value of
wrong type
Attributes added to the namespace returned by Attributes added to the namespace returned by
:meth:`Parse`: :meth:`Parse`:
...@@ -273,7 +434,10 @@ class PM3ArgumentParser(argparse.ArgumentParser): ...@@ -273,7 +434,10 @@ class PM3ArgumentParser(argparse.ArgumentParser):
be a filename of a JSON object string. be a filename of a JSON object string.
* :attr:`alignments` - :class:`ost.AlignmentList`, same order as * :attr:`alignments` - :class:`ost.AlignmentList`, same order as
:attr:`fasta` :attr:`fasta`, likely to **not** follow the order
of JSON input; first sequence of the alignment is
the target sequence, if in doubt, check for
sequence roles ``TARGET`` or ``TEMPLATE``
* :attr:`aln_sources` - the original source of the alignment, may be * :attr:`aln_sources` - the original source of the alignment, may be
filename(s) or a string in JSON format, filename(s) or a string in JSON format,
...@@ -307,6 +471,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): ...@@ -307,6 +471,8 @@ class PM3ArgumentParser(argparse.ArgumentParser):
action=PM3StoreOnceAction) action=PM3StoreOnceAction)
class PM3OptionsNamespace(object): class PM3OptionsNamespace(object):
# class will grow, so for the moment pylint is ignored
#pylint: disable=too-few-public-methods
""" """
This one is mainly for internal use. You can use it like everything that This one is mainly for internal use. You can use it like everything that
comes out of :meth:`argparse.ArgumentParser.parse_args`. Attributes are comes out of :meth:`argparse.ArgumentParser.parse_args`. Attributes are
...@@ -338,6 +504,11 @@ class PM3OptionsNamespace(object): ...@@ -338,6 +504,11 @@ class PM3OptionsNamespace(object):
return return
# Now for JSON input. Since one of the options needs to be given and # Now for JSON input. Since one of the options needs to be given and
# we already checked for FastA, no need to open a new branch, here. # we already checked for FastA, no need to open a new branch, here.
# decide if file or object
json_obj = _GetJSONOBject(self.json)
for aln in _GetAlnFromJSON(json_obj, self.json):
self.alignments.append(aln)
self.aln_sources.append(self.json)
# LocalWords: param attr prog argparse ArgumentParser bool sys os init str # LocalWords: param attr prog argparse ArgumentParser bool sys os init str
# LocalWords: progattr descattr argpinit argv formatter meth args namespace # LocalWords: progattr descattr argpinit argv formatter meth args namespace
......
This diff is collapsed.
...@@ -173,10 +173,10 @@ module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ ...@@ -173,10 +173,10 @@ module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Regular expression matching correct method names # Regular expression matching correct method names
method-rgx=(?:setUp|(?:[_A-Z]|test[A-Z])[a-zA-Z0-9_]{2,30})$ method-rgx=(?:setUp|(?:[_A-Z]|test[A-Z])[a-zA-Z0-9_]{2,40})$
# Naming hint for method names # Naming hint for method names
method-name-hint=(?:setUp|(?:[A-Z]|test[A-Z])[a-zA-Z0-9_]{2,30})$ method-name-hint=(?:setUp|(?:[A-Z]|test[A-Z])[a-zA-Z0-9_]{2,40})$
# Regular expression which should only match function or class names that do # Regular expression which should only match function or class names that do
# not require a docstring. # not require a docstring.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment