diff --git a/core/pymod/core/pm3argparse.py b/core/pymod/core/pm3argparse.py index e7d1da321a20ccc2efc4af159e13e1aa00105d19..bd6fd9a233f2a16619b5142d9528098bf26528fe 100644 --- a/core/pymod/core/pm3argparse.py +++ b/core/pymod/core/pm3argparse.py @@ -7,12 +7,159 @@ import sys import os import gzip import tempfile +#try: +# import ujson as json +#except ImportError: +import json import ost from ost import io, seq from promod3.core import helper +def _TmpForGZip(filename, suffix, msg_prefix): + """Unpack a file to a tmp file if gzipped. + """ + helper.FileExists(msg_prefix, 12, filename) + zip_fh = gzip.open(filename) + unzip_str = zip_fh.read() + zip_fh.close() + unzip_file = tempfile.NamedTemporaryFile(mode='w', suffix=suffix) + unzip_file.write(unzip_str) + unzip_file.flush() + return unzip_file + +def _CheckJSONAlnSeqKeyType(key_name, val_type, json_aln, seqtype, json_source): + '''Check a key/ value in a sequence exists and is of certain type. + ''' + if key_name not in json_aln[seqtype].keys(): + helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+ + "from '%s' is " % json_source+ + "missing the '%s' key" % key_name, 27) + altype = type(json_aln[seqtype][key_name]) + + if val_type is str or val_type is unicode: + if not (altype is unicode or altype is str): + helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+ + "'%s' from" % key_name+ + "'%s' is not a " % json_source+ + "%s" % str(val_type), 28) + elif not altype is val_type: + helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' " % seqtype+ + "'%s' from" % key_name+ + "'%s' is not a " % json_source+ + "%s" % str(val_type), 28) + +def _CreateNewAln(trg_name, trg_seq, trg_start, trg_end, tpl_name, tpl_seq, + tpl_offset): + # iternal function to makes things easier in other places, pylint ignored + #pylint: disable=too-many-arguments + '''Produce a new target-template alignment + ''' + new_aln = seq.CreateAlignment(seq.CreateSequence(\ + trg_name, + trg_seq[trg_start:trg_end]), + seq.CreateSequence(tpl_name, tpl_seq)) + new_aln.SetSequenceRole(0, 'TARGET') + new_aln.SetSequenceRole(1, 'TEMPLATE') + new_aln.SetSequenceOffset(1, tpl_offset) + return new_aln + +def _GetAlnFromJSON(json_object, json_source): + """Create alignments from a JSON object. + + Iterate the alignments in a JSON object and deliver OST alignments via the + yield operator. + """ + # alignments are stored via the 'alignmentlist' key + if 'alignmentlist' not in json_object.keys(): + helper.MsgErrorAndExit("JSON object from '%s' does not " % json_source+ + "provide an 'alignmentlist' key.", 21) + # alignments come as lists, to enable hetero oligos + if not type(json_object['alignmentlist']) is list: + helper.MsgErrorAndExit("JSON object from '%s' does not" % json_source+ + "provide a list behind 'alignmentlist'.", 24) + # take the alignments apart, each alignment is a dictionary + for json_aln in json_object['alignmentlist']: + # json_aln needs to be a dictionary + if not type(json_aln) is dict: + helper.MsgErrorAndExit("JSON 'alignmentlist' member from "+ + "'%s' is not a ' " % json_source+ + " dictionary: %s" % json_aln, 25) + # an alignment has a 'target' and a 'template' dictionary + # each of them has a 'name' and a 'seqres' pair + for flav in ['target', 'template']: + if flav not in json_aln.keys(): + helper.MsgErrorAndExit("JSON 'alignmentlist' from "+ + "'%s' does not " % json_source+ + "provide a '%s' key." % flav, 22) + # check sequence to be dictionary + if not type(json_aln[flav]) is dict: + helper.MsgErrorAndExit("JSON 'alignmentlist' '%s' from" % flav+ + "'%s' is not a " % json_source+ + "dictionary: %s" % json_aln[flav], 26) + # check for keys needed by both sequences: + for aln_key in ['name', 'seqres']: + _CheckJSONAlnSeqKeyType(aln_key, str, json_aln, flav, + json_source) + _CheckJSONAlnSeqKeyType('offset', int, json_aln, 'template', + json_source) + + yield _CreateNewAln(str(json_aln['target']['name']).strip(), + str(json_aln['target']['seqres']), + 0, + len(json_aln['target']['seqres']), + str(json_aln['template']['name']).strip(), + str(json_aln['template']['seqres']), + json_aln['template']['offset']) + +def _GetJSONOBject(json_input): + """Get a JSON object out of a string which may be an object or a path. + + If the input string starts with '{', we assume its a JSON object. File names + starting with '{' would be a bit weird. + + If we are looking at a file, check and load it. + + For a JSON object, check that everything is there. No checks for + superfluous stuff. + + As returnvalue we only use JSON objects. + """ + if json_input[0] != '{': + is_gz = helper.FileGzip("JSON alignment", 13, json_input) + readfile = json_input + if is_gz: + unzip_file = _TmpForGZip(json_input, '.json', "JSON alignment") + readfile = unzip_file.name + try: + jfh = open(readfile) + except IOError, ioe: + helper.MsgErrorAndExit("'--json' file '%s' " % json_input+ + "can not be processed: %s" % ioe.strerror, + 19) + except: + raise + try: + json_object = json.load(jfh) + except ValueError, vae: + if vae.message == 'No JSON object could be decoded': + helper.MsgErrorAndExit("'--json' file '%s' could " % json_input+ + "not be processed into a JSON object, "+ + "probably it's empty.", 20) + else: + raise + except: + raise + jfh.close() + else: + try: + json_object = json.loads(json_input) + except ValueError, vae: + helper.MsgErrorAndExit("'--json' string '%s' " % json_input+\ + "could not be decoded: %s" % vae.message, 23) + return json_object + def _GetTrgNameSeqFile(argstr): """Sort out what is target name and what is the sequence file name. @@ -50,13 +197,7 @@ def _FetchAlnFromFastaOpt(argstr): # loading the alignment, switch for gzip readfile = seqfile if is_gz: - zip_fh = gzip.open(seqfile) - unzip_str = zip_fh.read() - zip_fh.close() - unzip_file = tempfile.NamedTemporaryFile(mode='w', - suffix='.fas') - unzip_file.write(unzip_str) - unzip_file.flush() + unzip_file = _TmpForGZip(seqfile, '.fas', "Alignment") readfile = unzip_file.name try: aln = io.LoadAlignment(readfile, format="fasta") @@ -114,12 +255,8 @@ def _AssembleTrgTplAln(target, template): tpl_str = '' for i in range(start, end): tpl_str += template[i] - new_aln = seq.CreateAlignment(seq.CreateSequence(target.name.strip(), - str(target)[start:end]), - seq.CreateSequence(template.name.strip(), - tpl_str)) - new_aln.SetSequenceOffset(1, start) - return new_aln + return _CreateNewAln(target.name.strip(), str(target), start, end, + template.name.strip(), tpl_str, start) class PM3StoreOnceAction(argparse.Action): @@ -207,7 +344,7 @@ class PM3ArgumentParser(argparse.ArgumentParser): :param args: The argument string. As default |sysargv|_ is used. :type args: :class:`list` - :returns: :class:`promod3.cor.pm3argparse.PM3OptionsNamespace`. + :returns: :class:`promod3.core.pm3argparse.PM3OptionsNamespace`. """ opts = PM3OptionsNamespace() self.parse_args(args=args, namespace=opts) @@ -227,7 +364,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): self._AssembleAlignment() def AddAlignment(self): - """ + """Commandline options for alignments. + Add everything needed to load alignments to the argument parser. Creates several options/ arguments and adds some checks for post processing. This method only adds a flag to the parser to add alignment options on @@ -263,6 +401,29 @@ class PM3ArgumentParser(argparse.ArgumentParser): * 18 - sequences in the alignment have different length + * 19 - problem with a JSON formatted file handed over to ``--json`` + + * 20 - JSON file could not be decoded into a JSON object + + * 21 - JSON object has no 'alignmentlist' key + + * 22 - JSON object has no 'target'/ 'template' in the 'alignmentlist' + + * 23 - JSON string could not be decoded + + * 24 - JSON object 'alignmentlist' does not point to a list + + * 25 - JSON object 'alignmentlist' member is not a dictionary + + * 26 - JSON object 'alignmentlist' 'target'/ 'template' does not point + to a dictionary + + * 27 - JSON object 'alignmentlist' 'target'/ 'template' does not have + a needed key + + * 28 - JSON object 'alignmentlist' 'target'/ 'template' has a value of + wrong type + Attributes added to the namespace returned by :meth:`Parse`: @@ -273,7 +434,10 @@ class PM3ArgumentParser(argparse.ArgumentParser): be a filename of a JSON object string. * :attr:`alignments` - :class:`ost.AlignmentList`, same order as - :attr:`fasta` + :attr:`fasta`, likely to **not** follow the order + of JSON input; first sequence of the alignment is + the target sequence, if in doubt, check for + sequence roles ``TARGET`` or ``TEMPLATE`` * :attr:`aln_sources` - the original source of the alignment, may be filename(s) or a string in JSON format, @@ -307,6 +471,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): action=PM3StoreOnceAction) class PM3OptionsNamespace(object): + # class will grow, so for the moment pylint is ignored + #pylint: disable=too-few-public-methods """ This one is mainly for internal use. You can use it like everything that comes out of :meth:`argparse.ArgumentParser.parse_args`. Attributes are @@ -338,6 +504,11 @@ class PM3OptionsNamespace(object): return # Now for JSON input. Since one of the options needs to be given and # we already checked for FastA, no need to open a new branch, here. + # decide if file or object + json_obj = _GetJSONOBject(self.json) + for aln in _GetAlnFromJSON(json_obj, self.json): + self.alignments.append(aln) + self.aln_sources.append(self.json) # LocalWords: param attr prog argparse ArgumentParser bool sys os init str # LocalWords: progattr descattr argpinit argv formatter meth args namespace diff --git a/core/tests/test_pm3argparse.py b/core/tests/test_pm3argparse.py index d2101615d3955a6937fffcaa46a599ce67b0e07b..350a37a86c7d83a15ab544cf419a26e190f36c30 100644 --- a/core/tests/test_pm3argparse.py +++ b/core/tests/test_pm3argparse.py @@ -3,6 +3,12 @@ Testing our own little argument parser. """ import unittest +import tempfile +import gzip +try: + import ujson as json +except ImportError: + import json import ost from promod3.core import pm3argparse @@ -23,6 +29,27 @@ class _FetchLog(ost.LogSink): self.messages[level] = list() self.messages[level].append(message.strip()) +def _GetJsonTestObj(): + '''Create a JSOn object for testing + ''' + obj = dict() + ali = dict() + ali['target'] = {'name': 'TARGET', + 'seqres': + 'VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLS-----'+ + 'HGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHK-LRVDPVNFKLLSH'+ + 'CLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR'} + ali['template'] = {'name': 'TEMPLATE', + 'seqres': + 'HLTPEEKSAVTALWGKVN--VDEVGGEALGRLLVVYPWTQRFFESFGDLSTPD'+ + 'AVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHC-DKLHVDPENFR'+ + 'LLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH', + 'offset': 0} + obj['alignmentlist'] = list() + obj['alignmentlist'].append(ali) + + return obj + class PM3ArgParseTests(unittest.TestCase): def setUp(self): self.log = _FetchLog() @@ -240,6 +267,21 @@ class PM3ArgParseTests(unittest.TestCase): 'target ELTTRKDDQEETVRKRLVEYHQMTAPLL--YYYYKEAEAGNTK'+ 'YAKVDGTKPVAEVRADLEKILG\n1AKE.B ELTTRKDDQEETVRKRLVE'+ 'YHQMTAPLIGYYYYSKEAEAGNTKYAKVDGTKPV---AEVRADLEK\n') + self.assertEqual(opts.json, None) + + def testAddAlignmentGzipNoExist(self): + # using a gzip FastA file which does not exist + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--fasta', 'trg:target', + 'data/fasta/notthere.fas.gz']) + self.assertEqual(ecd.exception.code, 12) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'][0], + "Alignment file does not exist: "+ + "data/fasta/notthere.fas.gz") def testAddAlignmentSwitchSeqs(self): parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) @@ -348,15 +390,508 @@ class PM3ArgParseTests(unittest.TestCase): 'test_pm3argparse.py: error: argument -j/--json: '+ 'expected one argument']) - def testAddAlignmentJsonWorkingFile(self): + def testAddAlignmentJsonFileNoExist(self): + # fail when a non-existing file is handed over to --json + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', 'fileshouldnotexist']) + self.assertEqual(ecd.exception.code, 19) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ["'--json' file 'fileshouldnotexist' can not be "+ + "processed: No such file or directory"]) + + def testAddAlignmentJsonGzipNoExist(self): + # lets see what happens on json with a missing gzip file + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', 'fileshouldnotexist.gzip']) + self.assertEqual(ecd.exception.code, 19) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ["'--json' file 'fileshouldnotexist.gzip' can not be "+ + "processed: No such file or directory"]) + + def testAddAlignmentJsonEmptyFile(self): + # we want to fail on empty JSON files + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + tmp_ali = tempfile.NamedTemporaryFile(suffix='.json') + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', tmp_ali.name]) + tmp_ali.close() + self.assertEqual(ecd.exception.code, 20) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ["'--json' file '%s' could not be " % tmp_ali.name+ + "processed into a JSON object, probably it's empty."]) + + def testAddAlignmentJsonNoAlnLstLstKey(self): + # check that 'alignmentlist' features a list + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': "I'm not a list!"}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 24) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON object from \'{"alignmentlist": "I\'m not a '+ + 'list!"}\' does notprovide a list behind '+ + '\'alignmentlist\'.']) + + def testAddAlignmentJsonMalString(self): + # fail on improper JSON string + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_obj = "{'Wrong': 'wrong'}" + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_obj]) + self.assertEqual(ecd.exception.code, 23) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ["'--json' string '{'Wrong': 'wrong'}' could not be "+ + "decoded: Expecting property name: line 1 column 2 "+ + "(char 1)"]) + + def testAddAlignmentJsonNoAlnLstKey(self): + # detect missing key 'alignmentlist + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_obj = json.dumps({"Sth different": "Foo"}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_obj]) + self.assertEqual(ecd.exception.code, 21) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON object from \'{"Sth different": "Foo"}\' '+ + 'does not provide an \'alignmentlist\' key.']) + + def testAddAlignmentJsonNoTargetKey(self): + # check that 'alignmentlist'target' is required + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'Foo': 'BAR'}]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 22) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' from \'{"alignmentlist": '+ + '[{"Foo": "BAR"}]}\' does not provide a \'target\' '+ + 'key.']) + + def testAddAlignmentJsonNoTemplateKey(self): + # check that 'alignmentlist'template' is required + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': {'name' : 'AAA', + 'seqres': 'AA'}}]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 22) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' from \'{"alignmentlist": '+ + '[{"target": {"seqres": "AA", "name": "AAA"}}]}\' '+ + 'does not provide a \'template\' key.']) + + def testAddAlignmentJsonAlnTrgNoDict(self): + # entries of the alignmentlist need to be dict's + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': ['Seq1', 'Seq2']}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 25) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' member from '+ + '\'{"alignmentlist": ["Seq1", "Seq2"]}\' is not '+ + 'a \' dictionary: Seq1']) + + def testAddAlignmentJsonAlnTrgNotDict(self): + # entries of the alignmentlist need to be dict's of dict's + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': 'AAA', + 'template': 'BBB'}]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 26) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' \'target\' '+ + 'from\'{"alignmentlist": [{"target": "AAA", '+ + '"template": "BBB"}]}\' is not a dictionary: AAA']) + + def testAddAlignmentJsonAlnTrgNoNameNoSeqres(self): + # entries of the alignmentlist need to be dict's + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': {'AAA': 1}, + 'template': {'BBB': 2}}]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 27) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' \'target\' from '+ + '\'{"alignmentlist": [{"target": {"AAA": 1}, '+ + '"template": {"BBB": 2}}]}\' is missing the '+ + '\'name\' key']) + + def testAddAlignmentJsonAlnTrgTplNoString(self): + # entries of the sequence dict in an aln need to be str + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': {'name': 1, + 'seqres': 2}, + 'template': {'name': 2, + 'seqres': 2}}]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 28) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' \'target\' \'name\' '+ + 'from\'{"alignmentlist": [{"target": {"seqres": 2, '+ + '"name": 1}, "template": {"seqres": 2, "name": '+ + '2}}]}\' is not a <type \'str\'>']) + + def testAddAlignmentJsonAlnTplNoOffset(self): + # no offset for template sequence + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': {'name': 'A', + 'seqres': 'AA'}, + 'template': {'name': 'A', + 'seqres': 'AA'}} + ]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 27) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' \'template\' from '+ + '\'{"alignmentlist": [{"target": {"seqres": "AA", '+ + '"name": "A"}, "template": {"seqres": "AA", '+ + '"name": "A"}}]}\' is missing the \'offset\' key']) + + def testAddAlignmentJsonAlnTplOffsetStr(self): + # entries of the alignmentlist need to be dict's + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': [{'target': {'name': 'A', + 'seqres': 'AA'}, + 'template': {'name': 'A', + 'seqres': 'AA', + 'offset': '0'}} + ]}) + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['--json', json_str]) + self.assertEqual(ecd.exception.code, 28) + self.assertEqual(len(self.log.messages['ERROR']), 1) + self.assertEqual(self.log.messages['ERROR'], + ['JSON \'alignmentlist\' \'template\' \'offset\' '+ + 'from\'{"alignmentlist": [{"target": {"seqres": '+ + '"AA", "name": "A"}, "template": {"seqres": "AA", '+ + '"name": "A", "offset": "0"}}]}\' is not a <type '+ + '\'int\'>']) + + def testAddAlignmentJsonAlnString(self): + # entries of the alignmentlist need to be dict's + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': + [{'target': + {'name': 'target', + 'seqres': + 'APGAGKGTQAQFIMEKYGIPQISTGGGLRAAVKS---LGKQA'+ + 'KDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIP'+ + 'QADAMKEAGINVDYVLEF----ELIVDRIVGRRVHAPSGRVY'+ + 'HVKFNPPKVEGKDDVTGEELTTRKDDQEETVRKRLVEYHQMT'+ + 'APLL--YYYYKEAEAGNTKYAKVDGTKPVAEVRADLEKILG'}, + 'template': + {'name': '1AKE.B', + 'seqres': + 'APGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQA'+ + 'KDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIP'+ + 'QADAMKEAGINVDYVLEFDVPDELIVDRIVGRRVHAPSGRVY'+ + 'HVKFNPPKVEGKDDVTGEELTTRKDDQEETVRKRLVEYHQMT'+ + 'APLIGYYYYSKEAEAGNTKYAKVDGTKPV---AEVRADLEK', + 'offset': 7}}]}) + opts = parser.Parse(['--json', json_str]) + self.assertEqual(len(opts.alignments), 1) + self.assertEqual(opts.alignments[0].GetCount(), 2) + self.assertEqual(opts.alignments[0].GetLength(), 209) + self.assertEqual(opts.alignments[0].GetSequenceOffset(0), 0) + self.assertEqual(opts.alignments[0].GetSequenceOffset(1), 7) + self.assertEqual(opts.alignments[0].GetSequence(0).gapless_string, + 'APGAGKGTQAQFIMEKYGIPQISTGGGLRAAVKSLGKQAKDIMDAGKLVT'+ + 'DELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFE'+ + 'LIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDVTGEELTTRKDDQEETV'+ + 'RKRLVEYHQMTAPLLYYYYKEAEAGNTKYAKVDGTKPVAEVRADLEKILG') + self.assertEqual(opts.alignments[0].GetSequenceRole(0), 'TARGET') + self.assertEqual(opts.alignments[0].GetSequenceRole(1), 'TEMPLATE') + self.assertEqual(opts.alignments[0].GetSequence(0).name, 'target') + self.assertEqual(opts.alignments[0].GetSequence(1).name, '1AKE.B') + self.assertEqual(str(opts.alignments[0]), + 'target APGAGKGTQAQFIMEKYGIPQISTGGGLRAAVKS---LGKQAK'+ + 'DIMDAGKLVTDELVIALVKERIAQEDCRN\n1AKE.B APGAGKGTQAQF'+ + 'IMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKE'+ + 'RIAQEDCRN\n\ntarget GFLLDGFPRTIPQADAMKEAGINVDYVLEF'+ + '----ELIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDVTGE\n1AKE.B '+ + ' GFLLDGFPRTIPQADAMKEAGINVDYVLEFDVPDELIVDRIVGRRVHAPS'+ + 'GRVYHVKFNPPKVEGKDDVTGE\n\ntarget ELTTRKDDQEETVRKRL'+ + 'VEYHQMTAPLL--YYYYKEAEAGNTKYAKVDGTKPVAEVRADLEKILG\n1'+ + 'AKE.B ELTTRKDDQEETVRKRLVEYHQMTAPLIGYYYYSKEAEAGNTKY'+ + 'AKVDGTKPV---AEVRADLEK\n') + self.assertEqual(len(opts.aln_sources), 1) + self.assertEqual(opts.aln_sources[0], + '{"alignmentlist": [{"target": {"seqres": "APGAGKGTQ'+ + 'AQFIMEKYGIPQISTGGGLRAAVKS---LGKQAKDIMDAGKLVTDELVIAL'+ + 'VKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEF----ELIVD'+ + 'RIVGRRVHAPSGRVYHVKFNPPKVEGKDDVTGEELTTRKDDQEETVRKRLV'+ + 'EYHQMTAPLL--YYYYKEAEAGNTKYAKVDGTKPVAEVRADLEKILG", "'+ + 'name": "target"}, "template": {"seqres": "APGAGKGTQ'+ + 'AQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIAL'+ + 'VKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFDVPDELIVD'+ + 'RIVGRRVHAPSGRVYHVKFNPPKVEGKDDVTGEELTTRKDDQEETVRKRLV'+ + 'EYHQMTAPLIGYYYYSKEAEAGNTKYAKVDGTKPV---AEVRADLEK", "'+ + 'name": "1AKE.B", "offset": 7}}]}') + + def testAddAlignmentJsonAlnMultiString(self): + # test multiple alignments + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_str = json.dumps({'alignmentlist': + [{'target': + {'name': ' target 1', + 'seqres': + 'VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTY'+ + 'FPHF-DL-S----HGSAQVKGHGKKVADALTNAVAHVDDMPN'+ + 'ALSALSDLHAHK-LRVDPVNFKLLSHCLLVTLAAHLPAEFTP'+ + 'AVHASLDKFLASVSTVLTSKYR'}, + 'template': + {'name': '3e7b90809bd446a5', + 'seqres': + 'VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEK'+ + 'FDRFKHLKTEAEMKASEDLKKHGVTVLTALGAILKKKGHHEA'+ + 'ELKPLAQSHA-TKHKIPIKYLEFISEAIIHVLHSRHPGDFGA'+ + 'DAQGAMNKALELFRKDIAAKYK', + 'offset': 1}}, + {'target': + {'name': 'target 2', + 'seqres': + 'VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTY'+ + 'FPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALS'+ + 'DLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLD'+ + 'KFLASVSTVLTSKYR'}, + 'template': + {'name': 'af828e69a5f2d0fd', + 'seqres': + 'VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTY'+ + 'FPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALS'+ + 'DLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLD'+ + 'KFLASVSTVLTSKYR', + 'offset': 2}}, + {'target': + {'name': 'target 3', + 'seqres': + 'VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTY'+ + 'FPHF-DLS-----HGSAQVKGHGKKVADALTNAVAHVDDMPN'+ + 'ALSALSDLHAHK-LRVDPVNFKLLSHCLLVTLAAHLPAEFTP'+ + 'AVHASLDKFLASVSTVLTSKYR'}, + 'template': + {'name': '9287755aa6aa2758', + 'seqres': + 'HLTPEEKSAVTALWGKVN--VDEVGGEALGRLLVVYPWTQRF'+ + 'FESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKG'+ + 'TFATLSELHC-DKLHVDPENFRLLGNVLVCVLAHHFGKEFTP'+ + 'PVQAAYQKVVAGVANALAHKYH', + 'offset': 3}}, + {'target': {'name': 'target 4', + 'seqres': 'VDPVNFKLLSHCLLVTLAAHL'}, + 'template': {'name': 'e69e1ac0a4b2554d', + 'seqres': 'ATPEQAQLVHKEIRKIVKDTC', + 'offset': 4}}]}) + opts = parser.Parse(['--json', json_str]) + + self.assertEqual(len(opts.aln_sources), 1) + self.assertEqual(opts.aln_sources[0], + '{"alignmentlist": [{"target": {"seqres": "VLSPADKTN'+ + 'VKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DL-S----HGSAQ'+ + 'VKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHK-LRVDPVNFKLLSHC'+ + 'LLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR", "name": " ta'+ + 'rget 1"}, "template": {"seqres": "VLSEGEWQLVLHVWAKV'+ + 'EADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTEAEMKASEDLKKHGVTV'+ + 'LTALGAILKKKGHHEAELKPLAQSHA-TKHKIPIKYLEFISEAIIHVLHSR'+ + 'HPGDFGADAQGAMNKALELFRKDIAAKYK", "name": "3e7b90809b'+ + 'd446a5", "offset": 1}}, {"target": {"seqres": "VLSP'+ + 'ADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQV'+ + 'KGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLL'+ + 'VTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR", "name": "targ'+ + 'et 2"}, "template": {"seqres": "VLSPADKTNVKAAWGKVGA'+ + 'HAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAV'+ + 'AHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAV'+ + 'HASLDKFLASVSTVLTSKYR", "name": "af828e69a5f2d0fd", '+ + '"offset": 2}}, {"target": {"seqres": "VLSPADKTNVKAA'+ + 'WGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLS-----HGSAQVKGH'+ + 'GKKVADALTNAVAHVDDMPNALSALSDLHAHK-LRVDPVNFKLLSHCLLVT'+ + 'LAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR", "name": "target'+ + ' 3"}, "template": {"seqres": "HLTPEEKSAVTALWGKVN--V'+ + 'DEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAF'+ + 'SDGLAHLDNLKGTFATLSELHC-DKLHVDPENFRLLGNVLVCVLAHHFGKE'+ + 'FTPPVQAAYQKVVAGVANALAHKYH", "name": "9287755aa6aa27'+ + '58", "offset": 3}}, {"target": {"seqres": "VDPVNFKL'+ + 'LSHCLLVTLAAHL", "name": "target 4"}, "template": {"'+ + 'seqres": "ATPEQAQLVHKEIRKIVKDTC", "name": "e69e1ac0'+ + 'a4b2554d", "offset": 4}}]}') + self.assertEqual(len(opts.alignments), 4) + # aln 1 + self.assertEqual(opts.alignments[0].GetCount(), 2) + self.assertEqual(opts.alignments[0].GetLength(), 148) + self.assertEqual(opts.alignments[0].GetSequenceOffset(0), 0) + self.assertEqual(opts.alignments[0].GetSequenceOffset(1), 1) + self.assertEqual(opts.alignments[0].GetSequenceRole(0), 'TARGET') + self.assertEqual(opts.alignments[0].GetSequenceRole(1), 'TEMPLATE') + self.assertEqual(opts.alignments[0].GetSequence(0).name, 'target 1') + self.assertEqual(opts.alignments[0].GetSequence(1).name, + '3e7b90809bd446a5') + self.assertEqual(str(opts.alignments[0]), + 'target 1 VLSPADKTNVKAAWGKVGAHAGEYGAEALERMF'+ + 'LSFPTTKTYFPHF-DL-S----HGSAQVK\n3e7b90809bd446a5 VL'+ + 'SEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTEA'+ + 'EMKASEDLK\n\ntarget 1 GHGKKVADALTNAVAHVDDM'+ + 'PNALSALSDLHAHK-LRVDPVNFKLLSHCLLVTLAAHLPAEF\n3e7b908'+ + '09bd446a5 KHGVTVLTALGAILKKKGHHEAELKPLAQSHA-TKHKIPI'+ + 'KYLEFISEAIIHVLHSRHPGDF\n\ntarget 1 TPAVHAS'+ + 'LDKFLASVSTVLTSKYR\n3e7b90809bd446a5 GADAQGAMNKALEL'+ + 'FRKDIAAKYK\n') + + # aln 2 + self.assertEqual(opts.alignments[1].GetCount(), 2) + self.assertEqual(opts.alignments[1].GetLength(), 141) + self.assertEqual(opts.alignments[1].GetSequenceOffset(0), 0) + self.assertEqual(opts.alignments[1].GetSequenceOffset(1), 2) + self.assertEqual(opts.alignments[1].GetSequenceRole(0), 'TARGET') + self.assertEqual(opts.alignments[1].GetSequenceRole(1), 'TEMPLATE') + self.assertEqual(opts.alignments[1].GetSequence(0).name, 'target 2') + self.assertEqual(opts.alignments[1].GetSequence(1).name, + 'af828e69a5f2d0fd') + self.assertEqual(str(opts.alignments[1]), + 'target 2 VLSPADKTNVKAAWGKVGAHAGEYGAEALERMF'+ + 'LSFPTTKTYFPHFDLSHGSAQVKGHGKKV\naf828e69a5f2d0fd VL'+ + 'SPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSA'+ + 'QVKGHGKKV\n\ntarget 2 ADALTNAVAHVDDMPNALSA'+ + 'LSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHAS\naf828e6'+ + '9a5f2d0fd ADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLS'+ + 'HCLLVTLAAHLPAEFTPAVHAS\n\ntarget 2 LDKFLAS'+ + 'VSTVLTSKYR\naf828e69a5f2d0fd LDKFLASVSTVLTSKYR\n') + # aln 3 + self.assertEqual(opts.alignments[2].GetCount(), 2) + self.assertEqual(opts.alignments[2].GetLength(), 148) + self.assertEqual(opts.alignments[2].GetSequenceOffset(0), 0) + self.assertEqual(opts.alignments[2].GetSequenceOffset(1), 3) + self.assertEqual(opts.alignments[2].GetSequenceRole(0), 'TARGET') + self.assertEqual(opts.alignments[2].GetSequenceRole(1), 'TEMPLATE') + self.assertEqual(opts.alignments[2].GetSequence(0).name, 'target 3') + self.assertEqual(opts.alignments[2].GetSequence(1).name, + '9287755aa6aa2758') + self.assertEqual(str(opts.alignments[2]), + 'target 3 VLSPADKTNVKAAWGKVGAHAGEYGAEALERMF'+ + 'LSFPTTKTYFPHF-DLS-----HGSAQVK\n9287755aa6aa2758 HL'+ + 'TPEEKSAVTALWGKVN--VDEVGGEALGRLLVVYPWTQRFFESFGDLSTPD'+ + 'AVMGNPKVK\n\ntarget 3 GHGKKVADALTNAVAHVDDM'+ + 'PNALSALSDLHAHK-LRVDPVNFKLLSHCLLVTLAAHLPAEF\n9287755'+ + 'aa6aa2758 AHGKKVLGAFSDGLAHLDNLKGTFATLSELHC-DKLHVDP'+ + 'ENFRLLGNVLVCVLAHHFGKEF\n\ntarget 3 TPAVHAS'+ + 'LDKFLASVSTVLTSKYR\n9287755aa6aa2758 TPPVQAAYQKVVAG'+ + 'VANALAHKYH\n') + # aln 4 + self.assertEqual(opts.alignments[3].GetCount(), 2) + self.assertEqual(opts.alignments[3].GetLength(), 21) + self.assertEqual(opts.alignments[3].GetSequenceOffset(0), 0) + self.assertEqual(opts.alignments[3].GetSequenceOffset(1), 4) + self.assertEqual(opts.alignments[3].GetSequenceRole(0), 'TARGET') + self.assertEqual(opts.alignments[3].GetSequenceRole(1), 'TEMPLATE') + self.assertEqual(opts.alignments[3].GetSequence(0).name, 'target 4') + self.assertEqual(opts.alignments[3].GetSequence(1).name, + 'e69e1ac0a4b2554d') + self.assertEqual(str(opts.alignments[3]), + 'target 4 VDPVNFKLLSHCLLVTLAAHL\ne69e1ac0a4'+ + 'b2554d ATPEQAQLVHKEIRKIVKDTC\n') + + def testAddAlignmentJsonWorkingGzipFile(self): # positive test: everything works! parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) parser.AddAlignment() parser.AssembleParser() - parser.Parse(['--json', 'foo bar']) + json_obj = _GetJsonTestObj() + tmp_json = tempfile.NamedTemporaryFile(suffix='.json.gz') + with gzip.open(tmp_json.name, 'wb') as gfh: + json.dump(json_obj, gfh) + tmp_json.flush() + opts = parser.Parse(['--json', tmp_json.name]) + self.assertEqual(len(opts.aln_sources), 1) + self.assertEqual(opts.aln_sources[0], tmp_json.name) + self.assertEqual(len(opts.alignments), 1) + self.assertEqual(str(opts.alignments[0]), + 'TARGET VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKT'+ + 'YFPHF-DLS-----HGSAQVKGHGKKVAD\nTEMPLATE HLTPEEKSAV'+ + 'TALWGKVN--VDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKV'+ + 'KAHGKKVLG\n\nTARGET ALTNAVAHVDDMPNALSALSDLHAHK-L'+ + 'RVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVS\nTEMPLAT'+ + 'E AFSDGLAHLDNLKGTFATLSELHC-DKLHVDPENFRLLGNVLVCVLAH'+ + 'HFGKEFTPPVQAAYQKVVAGVA\n\nTARGET TVLTSKYR\nTEMPL'+ + 'ATE NALAHKYH\n') -# test that json always needs an argument -# test json with sth that is not a file and not a JSON object + def testAddAlignmentJsonWorkingFile(self): + # positive test: everything works! + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AssembleParser() + json_obj = _GetJsonTestObj() + tmp_json = tempfile.NamedTemporaryFile(suffix='.json') + json.dump(json_obj, tmp_json) + tmp_json.flush() + opts = parser.Parse(['--json', tmp_json.name]) + self.assertEqual(len(opts.aln_sources), 1) + self.assertEqual(opts.aln_sources[0], tmp_json.name) + self.assertEqual(len(opts.alignments), 1) + self.assertEqual(str(opts.alignments[0]), + 'TARGET VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKT'+ + 'YFPHF-DLS-----HGSAQVKGHGKKVAD\nTEMPLATE HLTPEEKSAV'+ + 'TALWGKVN--VDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKV'+ + 'KAHGKKVLG\n\nTARGET ALTNAVAHVDDMPNALSALSDLHAHK-L'+ + 'RVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVS\nTEMPLAT'+ + 'E AFSDGLAHLDNLKGTFATLSELHC-DKLHVDPENFRLLGNVLVCVLAH'+ + 'HFGKEFTPPVQAAYQKVVAGVA\n\nTARGET TVLTSKYR\nTEMPL'+ + 'ATE NALAHKYH\n') + self.assertEqual(opts.fasta, None) + +# test options: --disable-aln check (for amino acids) +# test options: --disable-input-checks (for all) +# test option: --disable-mm-check (macromolecule) if __name__ == "__main__": from ost import testutils diff --git a/extras/pre_commit/pm3_csc/filecheck/pylint-unittest-rc b/extras/pre_commit/pm3_csc/filecheck/pylint-unittest-rc index 9fbc13a8ad1c5f942c0f520d56acd811db87a803..3f21e8ffcac9d7d939390395fa465894955efb22 100644 --- a/extras/pre_commit/pm3_csc/filecheck/pylint-unittest-rc +++ b/extras/pre_commit/pm3_csc/filecheck/pylint-unittest-rc @@ -173,10 +173,10 @@ module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression matching correct method names -method-rgx=(?:setUp|(?:[_A-Z]|test[A-Z])[a-zA-Z0-9_]{2,30})$ +method-rgx=(?:setUp|(?:[_A-Z]|test[A-Z])[a-zA-Z0-9_]{2,40})$ # Naming hint for method names -method-name-hint=(?:setUp|(?:[A-Z]|test[A-Z])[a-zA-Z0-9_]{2,30})$ +method-name-hint=(?:setUp|(?:[A-Z]|test[A-Z])[a-zA-Z0-9_]{2,40})$ # Regular expression which should only match function or class names that do # not require a docstring.