diff --git a/core/pymod/pm3argparse.py b/core/pymod/pm3argparse.py index b8b6a640bf92ae736bfdfe6a3bddb827919f68ea..33aa1fac81338e5bc137701951cbd1bdef9ec64c 100644 --- a/core/pymod/pm3argparse.py +++ b/core/pymod/pm3argparse.py @@ -232,6 +232,17 @@ def _LoadEntity(filename): str(exc), 33) return ent +def _FetchProfileFromFile(filename): + """Load generic profile file from filename and return it.""" + argstr = "'--seqprof " + filename + "'" + helper.FileExists("Profile", 51, filename) + try: + prof = io.LoadSequenceProfile(filename) + except Exception, exc: + helper.MsgErrorAndExit(argstr + ": failure to parse profile file: " + + str(exc), 52) + return prof + def _GetChains(structures, structure_sources): """Get chain id to entity view (single chain) mapping (dict).""" # IDs: (file_base = base file name with no extensions) @@ -398,6 +409,8 @@ class PM3ArgumentParser(argparse.ArgumentParser): self._AssembleAlignment() if 'STRUCTURE' in self.activate: self._AssembleStructure() + if 'PROFILE' in self.activate: + self._AssembleProfile() def AddAlignment(self, allow_multitemplate=False): """Commandline options for alignments. @@ -474,7 +487,7 @@ class PM3ArgumentParser(argparse.ArgumentParser): def AddStructure(self, attach_views=False): """Commandline options for structures. - Activate everything needed to load alignments to the argument parser. + Activate everything needed to load structures to the argument parser. Command line arguments are then added in :meth:`AssembleParser` and the input is post processed and checked in :meth:`Parse`. @@ -532,6 +545,46 @@ class PM3ArgumentParser(argparse.ArgumentParser): if attach_views: self.activate.add('ATTACH_VIEWS') + def AddProfile(self): + """Commandline options for profiles + + Activate everything needed to load profiles to the argument parser. + Command line arguments are then added in :meth:`AssembleParser` and the + input is post processed and checked in :meth:`Parse`. + + Options/arguments added: + + * ``-s/--seqprof <FILE>`` - Sequence profile in any format readable + by the :meth:`ost.io.LoadSequenceProfile` method. Format is chosen by + file ending. Recognized file extensions: .hhm, .hhm.gz, .pssm, + .pssm.gz. + + Notes: + + * the profiles are mapped based on exact matches towards the gapless + target sequences, i.e. one profile is mapped to several chains in + case of homo-oligomers + + * every profile must have a unique sequence to avoid ambiguities + + * all or nothing - you cannot provide profiles for only a subset of + target sequences + + Attributes added to the namespace returned by :meth:`Parse`: + + * :attr:`profiles` - :class:`list` of :class:`ost.seq.ProfileHandle`, + ordered to match the target sequences. + + Exit codes related to profile input: + + * 51 - a given profile file does not exist + * 52 - failure to read a given profile file + * 53 - a profile cannot be mapped to any target sequence + * 54 - profile sequences are not unique + * 55 - only subset of target sequences is covered by profile + """ + self.activate.add('PROFILE') + def _AssembleAlignment(self): """Actually add alignment arguments/options.""" aln_grp = self.add_mutually_exclusive_group(required=True) @@ -571,6 +624,14 @@ class PM3ArgumentParser(argparse.ArgumentParser): ".ent.gz, .pdb.gz, .cif, .cif.gz.", action='append', default=list()) + def _AssembleProfile(self): + self.add_argument('-s', '--seqprof', metavar=('<FILE>'), + help="Sequence profile in any format readable by "+ + "OST's io.LoadSequenceProfile method. Format is "+ + "chosen by file ending. Recognized File Extensions: "+ + ".hhm, .hhm.gz, .pssm, .pssm.gz", action='append', + default=list()) + class PM3OptionsNamespace(object): # class will grow, so for the moment pylint is ignored #pylint: disable=too-few-public-methods @@ -591,6 +652,8 @@ class PM3OptionsNamespace(object): self._PostProcessStructure() if 'ATTACH_VIEWS' in activated: self._AttachViews() + if 'PROFILE' in activated: + self._PostProcessProfile() def _PostProcessAlignment(self): #pylint: disable=no-member @@ -640,6 +703,42 @@ class PM3OptionsNamespace(object): for aln in self.alignments: _AttachViewsToAln(aln, chain_entities) + def _PostProcessProfile(self): + """Get Profiles from command line input.""" + self.profiles = list() + + if len(self.seqprof) == 0: + # no profiles provided, remember the all or nothing principle + # so not having any profile is fine + return + + loaded_profiles = list() + for src in self.seqprof: + loaded_profiles.append(_FetchProfileFromFile(src)) + + prof_sequences = [p.sequence for p in loaded_profiles] + + # check uniqueness of loaded profiles + if len(set(prof_sequences)) != len(prof_sequences): + helper.MsgErrorAndExit("All sequence profiles must have unique " + + "sequence.", 54) + + # map onto alignment target sequences + trg_sequences = [aln.GetSequence(0).GetGaplessString() \ + for aln in self.alignments] + for s in trg_sequences: + try: + self.profiles.append(loaded_profiles[prof_sequences.index(s)]) + except Exception, exc: + helper.MsgErrorAndExit("Could not find profile with sequence " + + "that exactly matches trg seq: " + s, 55) + + # We found a profile for every target sequence. So if the size of unique + # target sequences is not the same as for unique profile sequences, + # we know that we have additional profiles that never got mapped + if len(set(trg_sequences)) != len(set(prof_sequences)): + helper.MsgErrorAndExit("Could not map every profile to a target " + + "sequence", 53) # LocalWords: param attr prog argparse ArgumentParser bool sys os init str # LocalWords: progattr descattr argpinit argv formatter meth args namespace diff --git a/core/tests/CMakeLists.txt b/core/tests/CMakeLists.txt index b7eebec0e86b04939ab02ac83281e74ac17ab1ef..829a9961c6ff7488d0b5ea3761f75bbe7b40987a 100644 --- a/core/tests/CMakeLists.txt +++ b/core/tests/CMakeLists.txt @@ -23,6 +23,10 @@ set(CORE_TEST_DATA data/aln_tpl/5d52-1_cut.pdb data/aln_tpl/5d52-1_cut_A.fasta data/aln_tpl/5d52-1_cut_B.fasta + data/aln_tpl/1crn.hhm + data/aln_tpl/1crn_corrupted.hhm + data/aln_tpl/5ua4_B.fasta + data/aln_tpl/5ua4_B.hhm data/fasta/alignment.fas data/fasta/1ake.fas data/fasta/1ake_1.fas @@ -31,6 +35,7 @@ set(CORE_TEST_DATA data/fasta/1ake.fas.gz data/fasta/1ake_nel.fas data/fasta/1ake_sw.fas + ) promod3_unittest(MODULE core diff --git a/core/tests/data/aln_tpl/1crn.hhm b/core/tests/data/aln_tpl/1crn.hhm new file mode 100644 index 0000000000000000000000000000000000000000..eb35d07151bbc66ea6f979f155f3f1fef9cf2d40 --- /dev/null +++ b/core/tests/data/aln_tpl/1crn.hhm @@ -0,0 +1,180 @@ +HHsearch 1.5 +NAME c6a0deb50c4f69619d193f0fde0517c2 +FAM +FILE seq01 +COM /scicore/soft/apps/HH-suite/2.0.16-goolf-1.4.10/bin/hhmake -i /scratch/14369431.1.short.q/tmpqOaEJI/seq01.a3m -o /scratch/14369431.1.short.q/tmpqOaEJI/seq01.hhm +DATE Mon Mar 7 14:19:45 2016 +LENG 46 match states, 46 columns in multiple alignment +FILT 55 out of 57 sequences passed filter (-id 90 -cov 0 -qid 0 -qsc -20.00 -diff 100) +NEFF 3.8 +SEQ +>ss_pred PSIPRED predicted secondary structure +CCCCCCHHHHHHHHHCCCCCCCHHHHHHHCCCEEECCCCCCCCCCC +>ss_conf PSIPRED confidence values +9877783554421111168998455542048366208989999999 +>Consensus +xsCCpstxaRnxYnxCrxxgxsxxxCaxxsgCkixsgxxCPxxyxx +>c6a0deb50c4f69619d193f0fde0517c2 +TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN +>gi|115605364|gb|ABJ15789.1| putative thionin precursor [Polygonum sibiricum] +-SCCQTTTARNIYNSCRLAGGSRERCASLSGCKHVTGNTCSPGWEK +>gi|4007745|emb|CAA65316.1| purothionin [Secale cereale] +-SCCKSTLGRNCYNLCRTRGAQK-LCANFCRCKLISSTSCPKEFPK +>gi|17381172|gb|AAL36398.1| putative thionin protein [Arabidopsis thaliana]gi|62320644|dbj|BAD95310.1| putative thionin [Arabidopsis thaliana] +-TCCPSQSTRKGFEDCISEGNLQILCSAESGCRDTYVGYCPSGFPY +>gi|802170|gb|AAB33011.1| crambin precursor=thionin variant Thi2Ca12 [Crambe abyssinica, seeds, Peptide Partial, 135 aa] +-SCCPTKSARNTFDVCRLTGTSMGLCAAISECKILSVTKCPSNLPY +>gi|21553588|gb|AAM62681.1| thionin Thi2.2 [Arabidopsis thaliana] +-ICCPTKDDRSVYFVCMLSVSSQFYCLLKSKCKNTSQTICPPGYTN +>gi|15218931|ref|NP_176784.1| thionin [Arabidopsis thaliana]gi|44888531|sp|Q9C8D6.1|THN24_ARATH RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|12322605|gb|AAG51299.1|AC026480_6 thionin, putative [Arabidopsis thaliana]gi|14190505|gb|AAK55733.1|AF380652_1 At1g66100/F15E12_20 [Arabidopsis thaliana]gi|15809774|gb|AAL06815.1| At1g66100/F15E12_20 [Arabidopsis thaliana]gi|332196341|gb|AEE34462.1| thionin [Arabidopsis thaliana] +-ICCPSIQARTFYNACLFAVGSPSSCIRNSSCLDISESTCPRGYTN +>gi|1729954|sp|Q05806.1|THN5_WHEAT RecName: Full=Type-5 thionin; Contains: RecName: Full=Type-5 thionin; AltName: Full=Type V thionin; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|21885|emb|CAA43844.1| wheat type V thionin [Triticum aestivum]gi|21887|emb|CAA43845.1| wheat type V thionin [Triticum aestivum] +-DCGANPFKVACFNSCLLGPSTVFQCADFCACRLPAG--------- +>gi|120564556|gb|ABM30200.1| thionin [Brassica juncea] +-SCCPSTAARWAYYLCTNSWPLTPLCISHTGC-IESETTCPPGYPY +>gi|545031|gb|AAB29760.1| thionin precursor {clone Thi1Va1} [Viscum album=mistletoe, Peptide, 115 aa] +-ICCRAPAGKKCYNLCTA-lLSSE-TCANTCYCKDVSGETCPAD--- +# +NULL 3706 5728 4211 4064 4839 3729 4763 4308 4069 3323 5509 4640 4464 4937 4285 4423 3815 3783 6325 4665 +HMM A C D E F G H I K L M N P Q R S T V W Y + M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D + 0 * * 0 * 0 * * * * +T 1 * * * * * * * * * * * * * * * * 0 * * * 1 + 0 * * * * * * 1000 0 0 + +T 2 * * 4186 * * 5283 * 2677 * 5264 * * * * * 833 3153 3986 * * 2 + 0 * * * * * * 3941 0 0 + +C 3 * 0 * * * * * * * * * * * * * * * * * * 3 + 0 * * * * * * 3941 0 0 + +C 4 * 122 * * 5259 4186 * * * * * * * * * * * * * * 4 + 0 * * * * * * 3941 0 0 + +P 5 4186 * * * * * * * 3250 * * * 549 5753 2856 * * * * * 5 + 0 * * * * * * 3941 0 0 + +S 6 5271 * 4429 * * * * * * * * 2329 * * 5941 866 2612 * * * 6 + 0 * * * * * * 3941 0 0 + +I 7 * * 5440 4820 * * * 3810 4212 * 5935 4983 3272 3386 4684 5283 1068 5174 * * 7 + 0 * * * * * * 3941 0 0 + +V 8 2614 * 4805 * 4186 * * 3036 * 3320 * * * 4094 * 2750 1838 5660 5873 * 8 + 0 * * * * * * 3941 0 0 + +A 9 646 * 4264 * * 2749 * * 4186 * * * * * * 3803 4903 * * * 9 + 0 * * * * * * 3941 0 0 + +R 10 * * * * * * * 5975 4265 * * * * * 189 * * 4186 * * 10 + 0 * * * * * * 3941 0 0 + +S 11 4186 * * 3803 * * * 5935 3608 * * 836 * * * 2953 5214 * 4935 5259 11 + 0 * * * * * * 3941 0 0 + +N 12 4935 1544 * 4787 5214 3942 * 2698 5264 * 4684 3736 * 5164 5174 * 3934 3625 * * 12 + 0 * * * * * * 3941 0 0 + +F 13 * * * * 2199 * * * * * * * * * * * * * * 354 13 + 0 * * * * * * 3941 0 0 + +N 14 * * 4222 3844 4805 * * * * 5264 * 510 * * * 4983 5283 5464 * 4935 14 + 0 * * * * * * 3941 0 0 + +V 15 2800 * 3844 * * * * 5271 * 2403 5174 * * * * 2663 3074 1902 * * 15 + 0 * * * * * * 3941 0 0 + +C 16 * 0 * * * * * * * * * * * * * * * * * * 16 + 0 * * * * * * 3941 0 0 + +R 17 * * * * * * 6200 2932 * 3611 4805 * * * 558 * 4093 * * * 17 + 0 * * * * * * 3941 0 0 + +L 18 3969 * * * 2171 * * 3956 * 1318 5174 4935 * * 5264 3115 6073 4979 * * 18 + 38 * 5271 * * * * 3941 0 0 + +P 19 3003 * * 4863 * 3441 * * * 5901 * * 2369 4784 2418 3812 2907 3735 * 4814 19 + 41 * 5155 0 * 0 * 3898 1025 1025 + +G 20 * * * * 4949 454 6123 * * 4542 * * 3604 * * * * 3948 4909 * 20 + 72 * 4354 * * * 0 3901 0 1033 + +T 21 2209 5224 * * * 2278 * * * * * 3752 4867 * * 2344 2028 * * * 21 + 39 5224 * 0 * 669 1430 3845 1023 1191 + +P 22 5624 * * * * * * 5870 * 3138 * * 1935 4054 * 1200 3439 * * * 22 + 0 * * * * 0 * 3903 0 1033 + +E 23 5952 6850 * 3099 * * 4802 5475 3316 * 4903 * 4232 2872 1627 5880 4176 4193 * 5287 23 + 184 * 3059 * * * * 3943 0 0 + +A 24 4442 * 3710 2779 2989 4157 * 4702 * * * * 1565 * 5707 4382 3234 * * * 24 + 28 5707 * 0 * 0 * 3843 1000 1329 + +I 25 4358 * * * 6658 * 5279 3244 4611 2579 * 5958 * 4193 3566 3476 2812 2484 * 4816 25 + 0 * * * * * * 3943 0 0 + +C 26 * 31 * * * * * * * * * * * * * 5573 * * * * 26 + 0 * * * * * * 3943 0 0 + +A 27 560 6231 * * * 4233 * 3636 * 4029 4985 * * * * 3609 * * * * 27 + 0 * * * * * * 3943 0 0 + +T 28 2460 * 3819 * * 4273 * 5850 2738 4816 * 3203 * 4985 3239 2607 3596 * * * 28 + 0 * * * * * * 3943 0 0 + +Y 29 4929 * 5658 4919 2879 4802 4950 3737 4252 1699 5444 5287 5995 * 5262 * 3899 3734 * 4566 29 + 0 * * * * * * 3943 0 0 + +T 30 3911 1554 * * * * * * * * * * * * * 1040 3227 * * * 30 + 0 * * * * * * 3943 0 0 + +G 31 4193 * 3877 5850 * 811 * * 4816 * * * * * 2699 4815 4659 * * 5301 31 + 0 * * * * * * 3943 0 0 + +C 32 * 0 * * * * * * * * * * * * * * * * * * 32 + 47 * 4950 * * * * 3943 0 0 + +I 33 * * * 4672 * * * 2865 809 3414 * * * 4933 3475 * * 4789 * * 33 + 0 * * * * 0 * 3904 0 1044 + +I 34 * * 3573 4802 * * 3933 921 * 2779 * 3898 * * 4839 * 5475 5952 * * 34 + 0 * * * * * * 3943 0 0 + +I 35 * * 5287 4950 4358 * * 1603 * 3869 * * 4193 6130 * 3498 3006 2227 * * 35 + 46 * 4985 * * * * 3943 0 0 + +P 36 4173 * 2874 * 5250 * * 5139 * 4792 * * 4633 4645 5394 1055 4050 * * 3805 36 + 201 * 2941 * * * 0 3881 0 1045 + +G 37 5480 * 5485 3849 * 869 * * * * * * * 4548 5245 3819 4686 2648 * * 37 + 22 * 6064 * * 0 * 3796 0 1453 + +A 38 4564 * 4944 5167 * 1910 * * 4653 4470 * 3572 4946 * 5111 2927 1892 * * * 38 + 0 * * * * 0 * 3775 0 1000 + +T 39 * 4660 * 5366 4744 6406 * 3919 2193 * * 4099 * 5382 5207 3976 1348 * * 4782 39 + 0 * * * * * * 3764 0 0 + +C 40 * 58 * * * * * * 4660 * * * * * * * * * * * 40 + 0 * * * * * * 3764 0 0 + +P 41 * * 4306 * * * * * 3731 * * 6351 322 4587 * 5643 * * * * 41 + 0 * * * * * * 3764 0 0 + +G 42 4398 * 6402 * * 4592 * * 4079 * * 4374 1200 * 3395 2109 5063 * * * 42 + 0 * * * * * * 3764 0 0 + +D 43 * * 2024 6014 4769 1231 4660 * * * * 4386 3096 4744 * 4862 * * * * 43 + 0 * * * * * * 3764 0 0 + +Y 44 * * * * 2991 * 4830 * * 3705 * * * * * * * * 2806 691 44 + 0 * * * * * * 3718 0 0 + +A 45 4566 * 3231 5056 * * * 3467 * * * 5405 1034 * 5100 6384 2896 4532 * * 45 + 35 5366 * 4087 87 * * 3718 1002 0 + +N 46 * * 4370 * * * 2588 * 1723 * * 2200 * * 5953 * * * * 2008 46 + 0 * * 0 * * * 3699 0 0 + +// diff --git a/core/tests/data/aln_tpl/1crn_corrupted.hhm b/core/tests/data/aln_tpl/1crn_corrupted.hhm new file mode 100644 index 0000000000000000000000000000000000000000..01765421ff452e8094d103970ff78277a80e1a5e --- /dev/null +++ b/core/tests/data/aln_tpl/1crn_corrupted.hhm @@ -0,0 +1,185 @@ +####################################################################### +# THIS FILE IS CORRUPTED!!!!!!!!! # +# First three amino acids do not have the right number of frequencies # +####################################################################### + +HHsearch 1.5 +NAME c6a0deb50c4f69619d193f0fde0517c2 +FAM +FILE seq01 +COM /scicore/soft/apps/HH-suite/2.0.16-goolf-1.4.10/bin/hhmake -i /scratch/14369431.1.short.q/tmpqOaEJI/seq01.a3m -o /scratch/14369431.1.short.q/tmpqOaEJI/seq01.hhm +DATE Mon Mar 7 14:19:45 2016 +LENG 46 match states, 46 columns in multiple alignment +FILT 55 out of 57 sequences passed filter (-id 90 -cov 0 -qid 0 -qsc -20.00 -diff 100) +NEFF 3.8 +SEQ +>ss_pred PSIPRED predicted secondary structure +CCCCCCHHHHHHHHHCCCCCCCHHHHHHHCCCEEECCCCCCCCCCC +>ss_conf PSIPRED confidence values +9877783554421111168998455542048366208989999999 +>Consensus +xsCCpstxaRnxYnxCrxxgxsxxxCaxxsgCkixsgxxCPxxyxx +>c6a0deb50c4f69619d193f0fde0517c2 +TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN +>gi|115605364|gb|ABJ15789.1| putative thionin precursor [Polygonum sibiricum] +-SCCQTTTARNIYNSCRLAGGSRERCASLSGCKHVTGNTCSPGWEK +>gi|4007745|emb|CAA65316.1| purothionin [Secale cereale] +-SCCKSTLGRNCYNLCRTRGAQK-LCANFCRCKLISSTSCPKEFPK +>gi|17381172|gb|AAL36398.1| putative thionin protein [Arabidopsis thaliana]gi|62320644|dbj|BAD95310.1| putative thionin [Arabidopsis thaliana] +-TCCPSQSTRKGFEDCISEGNLQILCSAESGCRDTYVGYCPSGFPY +>gi|802170|gb|AAB33011.1| crambin precursor=thionin variant Thi2Ca12 [Crambe abyssinica, seeds, Peptide Partial, 135 aa] +-SCCPTKSARNTFDVCRLTGTSMGLCAAISECKILSVTKCPSNLPY +>gi|21553588|gb|AAM62681.1| thionin Thi2.2 [Arabidopsis thaliana] +-ICCPTKDDRSVYFVCMLSVSSQFYCLLKSKCKNTSQTICPPGYTN +>gi|15218931|ref|NP_176784.1| thionin [Arabidopsis thaliana]gi|44888531|sp|Q9C8D6.1|THN24_ARATH RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|12322605|gb|AAG51299.1|AC026480_6 thionin, putative [Arabidopsis thaliana]gi|14190505|gb|AAK55733.1|AF380652_1 At1g66100/F15E12_20 [Arabidopsis thaliana]gi|15809774|gb|AAL06815.1| At1g66100/F15E12_20 [Arabidopsis thaliana]gi|332196341|gb|AEE34462.1| thionin [Arabidopsis thaliana] +-ICCPSIQARTFYNACLFAVGSPSSCIRNSSCLDISESTCPRGYTN +>gi|1729954|sp|Q05806.1|THN5_WHEAT RecName: Full=Type-5 thionin; Contains: RecName: Full=Type-5 thionin; AltName: Full=Type V thionin; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|21885|emb|CAA43844.1| wheat type V thionin [Triticum aestivum]gi|21887|emb|CAA43845.1| wheat type V thionin [Triticum aestivum] +-DCGANPFKVACFNSCLLGPSTVFQCADFCACRLPAG--------- +>gi|120564556|gb|ABM30200.1| thionin [Brassica juncea] +-SCCPSTAARWAYYLCTNSWPLTPLCISHTGC-IESETTCPPGYPY +>gi|545031|gb|AAB29760.1| thionin precursor {clone Thi1Va1} [Viscum album=mistletoe, Peptide, 115 aa] +-ICCRAPAGKKCYNLCTA-lLSSE-TCANTCYCKDVSGETCPAD--- +# +NULL 3706 5728 4211 4064 4839 3729 4763 4308 4069 3323 5509 4640 4464 4937 4285 4423 3815 3783 6325 4665 +HMM A C D E F G H I K L M N P Q R S T V W Y + M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D + 0 * * 0 * 0 * * * * +T 1 * * * * * * * * * * * * * * * * 0 * * + 0 * * * * * * 1000 0 0 + +T 2 * * 4186 * * 5283 * 2677 * 5264 * * * * * 833 3153 3986 * + 0 * * * * * * 3941 0 0 + +C 3 * 0 * * * * * * * * * * * * * * * * * + 0 * * * * * * 3941 0 0 + +C 4 * 122 * * 5259 4186 * * * * * * * * * * * * * * 4 + 0 * * * * * * 3941 0 0 + +P 5 4186 * * * * * * * 3250 * * * 549 5753 2856 * * * * * 5 + 0 * * * * * * 3941 0 0 + +S 6 5271 * 4429 * * * * * * * * 2329 * * 5941 866 2612 * * * 6 + 0 * * * * * * 3941 0 0 + +I 7 * * 5440 4820 * * * 3810 4212 * 5935 4983 3272 3386 4684 5283 1068 5174 * * 7 + 0 * * * * * * 3941 0 0 + +V 8 2614 * 4805 * 4186 * * 3036 * 3320 * * * 4094 * 2750 1838 5660 5873 * 8 + 0 * * * * * * 3941 0 0 + +A 9 646 * 4264 * * 2749 * * 4186 * * * * * * 3803 4903 * * * 9 + 0 * * * * * * 3941 0 0 + +R 10 * * * * * * * 5975 4265 * * * * * 189 * * 4186 * * 10 + 0 * * * * * * 3941 0 0 + +S 11 4186 * * 3803 * * * 5935 3608 * * 836 * * * 2953 5214 * 4935 5259 11 + 0 * * * * * * 3941 0 0 + +N 12 4935 1544 * 4787 5214 3942 * 2698 5264 * 4684 3736 * 5164 5174 * 3934 3625 * * 12 + 0 * * * * * * 3941 0 0 + +F 13 * * * * 2199 * * * * * * * * * * * * * * 354 13 + 0 * * * * * * 3941 0 0 + +N 14 * * 4222 3844 4805 * * * * 5264 * 510 * * * 4983 5283 5464 * 4935 14 + 0 * * * * * * 3941 0 0 + +V 15 2800 * 3844 * * * * 5271 * 2403 5174 * * * * 2663 3074 1902 * * 15 + 0 * * * * * * 3941 0 0 + +C 16 * 0 * * * * * * * * * * * * * * * * * * 16 + 0 * * * * * * 3941 0 0 + +R 17 * * * * * * 6200 2932 * 3611 4805 * * * 558 * 4093 * * * 17 + 0 * * * * * * 3941 0 0 + +L 18 3969 * * * 2171 * * 3956 * 1318 5174 4935 * * 5264 3115 6073 4979 * * 18 + 38 * 5271 * * * * 3941 0 0 + +P 19 3003 * * 4863 * 3441 * * * 5901 * * 2369 4784 2418 3812 2907 3735 * 4814 19 + 41 * 5155 0 * 0 * 3898 1025 1025 + +G 20 * * * * 4949 454 6123 * * 4542 * * 3604 * * * * 3948 4909 * 20 + 72 * 4354 * * * 0 3901 0 1033 + +T 21 2209 5224 * * * 2278 * * * * * 3752 4867 * * 2344 2028 * * * 21 + 39 5224 * 0 * 669 1430 3845 1023 1191 + +P 22 5624 * * * * * * 5870 * 3138 * * 1935 4054 * 1200 3439 * * * 22 + 0 * * * * 0 * 3903 0 1033 + +E 23 5952 6850 * 3099 * * 4802 5475 3316 * 4903 * 4232 2872 1627 5880 4176 4193 * 5287 23 + 184 * 3059 * * * * 3943 0 0 + +A 24 4442 * 3710 2779 2989 4157 * 4702 * * * * 1565 * 5707 4382 3234 * * * 24 + 28 5707 * 0 * 0 * 3843 1000 1329 + +I 25 4358 * * * 6658 * 5279 3244 4611 2579 * 5958 * 4193 3566 3476 2812 2484 * 4816 25 + 0 * * * * * * 3943 0 0 + +C 26 * 31 * * * * * * * * * * * * * 5573 * * * * 26 + 0 * * * * * * 3943 0 0 + +A 27 560 6231 * * * 4233 * 3636 * 4029 4985 * * * * 3609 * * * * 27 + 0 * * * * * * 3943 0 0 + +T 28 2460 * 3819 * * 4273 * 5850 2738 4816 * 3203 * 4985 3239 2607 3596 * * * 28 + 0 * * * * * * 3943 0 0 + +Y 29 4929 * 5658 4919 2879 4802 4950 3737 4252 1699 5444 5287 5995 * 5262 * 3899 3734 * 4566 29 + 0 * * * * * * 3943 0 0 + +T 30 3911 1554 * * * * * * * * * * * * * 1040 3227 * * * 30 + 0 * * * * * * 3943 0 0 + +G 31 4193 * 3877 5850 * 811 * * 4816 * * * * * 2699 4815 4659 * * 5301 31 + 0 * * * * * * 3943 0 0 + +C 32 * 0 * * * * * * * * * * * * * * * * * * 32 + 47 * 4950 * * * * 3943 0 0 + +I 33 * * * 4672 * * * 2865 809 3414 * * * 4933 3475 * * 4789 * * 33 + 0 * * * * 0 * 3904 0 1044 + +I 34 * * 3573 4802 * * 3933 921 * 2779 * 3898 * * 4839 * 5475 5952 * * 34 + 0 * * * * * * 3943 0 0 + +I 35 * * 5287 4950 4358 * * 1603 * 3869 * * 4193 6130 * 3498 3006 2227 * * 35 + 46 * 4985 * * * * 3943 0 0 + +P 36 4173 * 2874 * 5250 * * 5139 * 4792 * * 4633 4645 5394 1055 4050 * * 3805 36 + 201 * 2941 * * * 0 3881 0 1045 + +G 37 5480 * 5485 3849 * 869 * * * * * * * 4548 5245 3819 4686 2648 * * 37 + 22 * 6064 * * 0 * 3796 0 1453 + +A 38 4564 * 4944 5167 * 1910 * * 4653 4470 * 3572 4946 * 5111 2927 1892 * * * 38 + 0 * * * * 0 * 3775 0 1000 + +T 39 * 4660 * 5366 4744 6406 * 3919 2193 * * 4099 * 5382 5207 3976 1348 * * 4782 39 + 0 * * * * * * 3764 0 0 + +C 40 * 58 * * * * * * 4660 * * * * * * * * * * * 40 + 0 * * * * * * 3764 0 0 + +P 41 * * 4306 * * * * * 3731 * * 6351 322 4587 * 5643 * * * * 41 + 0 * * * * * * 3764 0 0 + +G 42 4398 * 6402 * * 4592 * * 4079 * * 4374 1200 * 3395 2109 5063 * * * 42 + 0 * * * * * * 3764 0 0 + +D 43 * * 2024 6014 4769 1231 4660 * * * * 4386 3096 4744 * 4862 * * * * 43 + 0 * * * * * * 3764 0 0 + +Y 44 * * * * 2991 * 4830 * * 3705 * * * * * * * * 2806 691 44 + 0 * * * * * * 3718 0 0 + +A 45 4566 * 3231 5056 * * * 3467 * * * 5405 1034 * 5100 6384 2896 4532 * * 45 + 35 5366 * 4087 87 * * 3718 1002 0 + +N 46 * * 4370 * * * 2588 * 1723 * * 2200 * * 5953 * * * * 2008 46 + 0 * * 0 * * * 3699 0 0 + +// diff --git a/core/tests/data/aln_tpl/5ua4_B.fasta b/core/tests/data/aln_tpl/5ua4_B.fasta new file mode 100644 index 0000000000000000000000000000000000000000..92c2692ce0377b23b6f6fc861ee165c3e39eb344 --- /dev/null +++ b/core/tests/data/aln_tpl/5ua4_B.fasta @@ -0,0 +1,4 @@ +>target +SESQEAVIRDIARHLARIGDRMEYGIRPGLVDSL +>pdb_id=4bd2, chain=B, assembly_id=1, offset=0 atoms +SESQEDIIRNIARHLAQVGDSMDRSIPPGL---- diff --git a/core/tests/data/aln_tpl/5ua4_B.hhm b/core/tests/data/aln_tpl/5ua4_B.hhm new file mode 100644 index 0000000000000000000000000000000000000000..a038b72184271d8c02f22a4f5f40cb28efa2fbe9 --- /dev/null +++ b/core/tests/data/aln_tpl/5ua4_B.hhm @@ -0,0 +1,144 @@ +HHsearch 1.5 +NAME target +FAM +FILE query_hhblits +COM /scicore/soft/apps/HH-suite/2.0.16-goolf-1.4.10-Boost-1.53.0/bin/hhmake -i <101 characters> -o <101 characters> +DATE Sat Dec 31 05:49:23 2016 +LENG 34 match states, 34 columns in multiple alignment +FILT 11 out of 13 sequences passed filter (-id 90 -cov 0 -qid 0 -qsc -20.00 -diff 100) +NEFF 2.0 +SEQ +>ss_pred PSIPRED predicted secondary structure +CHHHHHHHHHHHHHHHHHHHHHHCCCCCHHHCCC +>ss_conf PSIPRED confidence values +9159999999999999975443036780111159 +>Consensus +SEsQEeiIxxiArxLAqiGDxmdxsIxPxlvxxL +>target +SESQEAVIRDIARHLARIGDRMEYGIRPGLVDSL +>gi|12083631|ref|NP_073175.1| BH3-interacting domain death agonist [Rattus norvegicus]gi|88909592|sp|Q9JLT6.2|BID_RAT RecName: Full=BH3-interacting domain death agonist; AltName: Full=p22 BID; Short=BID; Contains: RecName: Full=BH3-interacting domain death agonist p15; AltName: Full=p15 BID; Contains: RecName: Full=BH3-interacting domain death agonist p13; AltName: Full=p13 BID; Contains: RecName: Full=BH3-interacting domain death agonist p11; AltName: Full=p11 BIDgi|8050830|gb|AAF71759.1|AF259503_1 BID protein [Rattus norvegicus]gi|149049571|gb|EDM02025.1| BH3 interacting domain death agonist [Rattus norvegicus]gi|157041211|dbj|BAF79674.1| desmocollin type 4 [Rattus norvegicus] +SESQDEVIHNIARHLAQAGDELDHSIQPTLVRQL +>gi|115495761|ref|NP_001068914.1| BH3 interacting domain death agonist [Bos taurus]gi|109658413|gb|AAI18360.1| BH3 interacting domain death agonist [Bos taurus]gi|296486995|gb|DAA29108.1| BH3 interacting domain death agonist [Bos taurus] +SERQEEAVREVARQLAQIGDRLEGSIHPGMVAGL +>gi|281341033|gb|EFB16617.1| hypothetical protein PANDA_016465 [Ailuropoda melanoleuca] +SQGQEEIIQDIARQLAQIGDDMDHSIHPGLMNNL +>gi|307335680|gb|ADN43410.1| BH3 interacting domain death agonist [Lemur catta] +SESEEDLIQTLARQLAQIGDSMERSIPPRLVDHL +>gi|307335678|gb|ADN43409.1| BH3 interacting domain death agonist [Felis catus] +SESQEEIIQDIARQLAQIGDRMDHSIHPRVVNNL +>gi|307335682|gb|ADN43411.1| BH3 interacting domain death agonist [Saimiri boliviensis] +SESQEDIIRNIARQLAQVGDSMDRSIPPGLVNRL +>gi|291412643|ref|XP_002722593.1| PREDICTED: BH3 interacting domain death agonist-like [Oryctolagus cuniculus] +SESQEEVIRNIARRLAQIGDRMDHHIQPELLNNL +>gi|296191314|ref|XP_002743575.1| PREDICTED: hypothetical protein LOC100403496 [Callithrix jacchus] +SESQEDVIRSIARHLAHIGDSMDRSIPLDLVDRL +>gi|334348150|ref|XP_001373913.2| PREDICTED: envelope glycoprotein-like [Monodelphis domestica] +SESQEEIIQRIAMQLAKIGDNIENSIQPRVVDEL +# +NULL 3706 5728 4211 4064 4839 3729 4763 4308 4069 3323 5509 4640 4464 4937 4285 4423 3815 3783 6325 4665 +HMM A C D E F G H I K L M N P Q R S T V W Y + M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D + 0 * * 0 * 0 * * * * +S 1 * * * * * * * * * * * * * * * 0 * * * * 1 + 0 * * * * * * 1973 0 0 + +E 2 * * * 143 * * * * * * * * * 3405 * * * * * * 2 + 0 * * * * * * 1973 0 0 + +S 3 * * * * * 3405 * * * * * * * * 2994 358 * * * * 3 + 0 * * * * * * 1973 0 0 + +Q 4 * * * 3344 * * * * * * * * * 150 * * * * * * 4 + 0 * * * * * * 1973 0 0 + +E 5 * * 3425 141 * * * * * * * * * * * * * * * * 5 + 0 * * * * * * 1973 0 0 + +A 6 3391 * 1939 635 * * * * * * * * * * * * * * * * 6 + 0 * * * * * * 1973 0 0 + +V 7 2994 * * * * * * 1285 * 3344 * * * * * * * 1452 * * 7 + 0 * * * * * * 1973 0 0 + +I 8 * * * * * * * 194 * * * * * * * * * 2994 * * 8 + 0 * * * * * * 1973 0 0 + +R 9 * * * * * * 2586 * * * * * * 1457 1092 * * * * * 9 + 0 * * * * * * 1973 0 0 + +D 10 * * 1981 2994 * * * * * * * 1628 * * 3212 3454 3344 * * * 10 + 0 * * * * * * 1973 0 0 + +I 11 * * * * * * * 366 * 3344 * * * * * * * 2994 * * 11 + 0 * * * * * * 1973 0 0 + +A 12 0 * * * * * * * * * * * * * * * * * * * 12 + 0 * * * * * * 1973 0 0 + +R 13 * * * * * * * * * * 3212 * * * 165 * * * * * 13 + 0 * * * * * * 1973 0 0 + +H 14 * * * * * * 1502 * * * * * * 834 3540 * * * * * 14 + 0 * * * * * * 1973 0 0 + +L 15 * * * * * * * * * 0 * * * * * * * * * * 15 + 0 * * * * * * 1973 0 0 + +A 16 0 * * * * * * * * * * * * * * * * * * * 16 + 0 * * * * * * 1973 0 0 + +R 17 * * * * * * 3454 * 3212 * * * * 503 3391 * * * * * 17 + 0 * * * * * * 1973 0 0 + +I 18 3425 * * * * * * 259 * * * * * * * * * 3816 * * 18 + 0 * * * * * * 1973 0 0 + +G 19 * * * * * 0 * * * * * * * * * * * * * * 19 + 0 * * * * * * 1973 0 0 + +D 20 * * 0 * * * * * * * * * * * * * * * * * 20 + 0 * * * * * * 1973 0 0 + +R 21 * * 3405 2586 * * * * * * * 3212 * * 1433 1939 * * * * 21 + 0 * * * * * * 1973 0 0 + +M 22 * * * * * * * 3212 * 2194 570 * * * * * * * * * 22 + 0 * * * * * * 1973 0 0 + +E 23 * * 804 1227 * * * * * * * * * * * * * * * * 23 + 0 * * * * * * 1973 0 0 + +Y 24 * * * * * 2994 1285 * * * * 3212 * * 1939 * * * * 3391 24 + 0 * * * * * * 1973 0 0 + +G 25 * * * * * 3391 3540 * * * * 3766 * * * 424 * * * * 25 + 0 * * * * * * 1973 0 0 + +I 26 * * * * * * * 0 * * * * * * * * * * * * 26 + 0 * * * * * * 1973 0 0 + +R 27 * * * * * * 1819 * * * * * 1939 1472 3391 * * * * * 27 + 0 * * * * * * 1973 0 0 + +P 28 * * * * * * * * * 3454 * * 138 * * * * * * * 28 + 0 * * * * * * 1973 0 0 + +G 29 * * 3454 3540 * 1372 * * * * * * * * 1889 * 2586 * * * 29 + 0 * * * * * * 1973 0 0 + +L 30 * * * * * * * * * 508 2994 * * * * * * 2544 * * 30 + 0 * * * * * * 1973 0 0 + +V 31 * * * * * * * * * 3540 3405 * * * * * * 287 * * 31 + 0 * * * * * * 1973 0 0 + +D 32 2994 * 1348 * * * * * * * * 1667 * * 2586 * * * * * 32 + 0 * * * * * * 1973 0 0 + +S 33 * * * 3212 * 2994 3344 * * * * 2036 * 2586 2624 3391 * * * * 33 + 0 * * * * * * 1973 0 0 + +L 34 * * * * * * * * * 0 * * * * * * * * * * 34 + 0 * * 0 * * * 1973 0 0 + +// diff --git a/core/tests/test_pm3argparse.py b/core/tests/test_pm3argparse.py index 0e0d617c98734c9348cae38e24015a779077871e..0b2310e8cf0db4494185ac68bf521e6a7b7fb681 100644 --- a/core/tests/test_pm3argparse.py +++ b/core/tests/test_pm3argparse.py @@ -1101,7 +1101,113 @@ class PM3ArgParseTests(unittest.TestCase): self.assertEqual(len(self.log.messages['ERROR']), 1) self.assertEqual(self.log.messages['ERROR'], ['Could not find chain with ID tpl (should be '+ - '<FILE>.<CHAIN>) to attach to sequence named tpl']) + '<FILE>.<CHAIN>) to attach to sequence named tpl']) + + def testProfileDoesNotExist(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/IDoNotExist.hhm']) + self.assertEqual(ecd.exception.code, 51) + self.assertEqual(len(self.log.messages['ERROR']), 1) + + def testProfileIsFuckedUp(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/1crn_corrupted.hhm']) + self.assertEqual(ecd.exception.code, 52) + self.assertEqual(len(self.log.messages['ERROR']), 1) + + def testAdditionalProfile(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/1crn.hhm', + '-s', 'data/aln_tpl/5ua4_B.hhm']) + self.assertEqual(ecd.exception.code, 53) + self.assertEqual(len(self.log.messages['ERROR']), 1) + + def testUniqueProfiles(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/1crn.hhm', + '-s', 'data/aln_tpl/1crn.hhm']) + self.assertEqual(ecd.exception.code, 54) + self.assertEqual(len(self.log.messages['ERROR']), 1) + + def testAllCovered(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + with self.assertRaises(SystemExit) as ecd: + parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/5d52-1_cut_A.fasta', + '-s', 'data/aln_tpl/1crn.hhm']) + self.assertEqual(ecd.exception.code, 55) + self.assertEqual(len(self.log.messages['ERROR']), 1) + + def testAllGoodOneChain(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/1crn.hhm']) + + self.assertEqual(len(opts.alignments), 1) + self.assertEqual(len(opts.profiles), 1) + self.assertEqual(opts.alignments[0].GetSequence(0).GetGaplessString(), + opts.profiles[0].sequence) + + def testAllGoodHomoOligo(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/1crn.fasta', + '-s', 'data/aln_tpl/1crn.hhm']) + self.assertEqual(len(opts.alignments), 2) + self.assertEqual(len(opts.profiles), 2) + self.assertEqual(opts.alignments[0].GetSequence(0).GetGaplessString(), + opts.profiles[0].sequence) + self.assertEqual(opts.alignments[1].GetSequence(0).GetGaplessString(), + opts.profiles[1].sequence) + + def testAllGoodHeteroOligo(self): + parser = pm3argparse.PM3ArgumentParser(__doc__, action=False) + parser.AddAlignment() + parser.AddProfile() + parser.AssembleParser() + + opts = parser.Parse(['-f', 'data/aln_tpl/1crn.fasta', + '-f', 'data/aln_tpl/5ua4_B.fasta', + '-s', 'data/aln_tpl/1crn.hhm', + '-s', 'data/aln_tpl/5ua4_B.hhm']) + self.assertEqual(len(opts.alignments), 2) + self.assertEqual(len(opts.profiles), 2) + self.assertEqual(opts.alignments[0].GetSequence(0).GetGaplessString(), + opts.profiles[0].sequence) + self.assertEqual(opts.alignments[1].GetSequence(0).GetGaplessString(), + opts.profiles[1].sequence) + # test options: --disable-aln check (for amino acids) # test options: --disable-input-checks (for all)