diff --git a/modules/bindings/pymod/clustalw.py b/modules/bindings/pymod/clustalw.py index fdf024ebc0c2acaeb7825371e53af56ff65ecf74..c8221fb622bb00dd0730b9d029c06c28c8f38b59 100644 --- a/modules/bindings/pymod/clustalw.py +++ b/modules/bindings/pymod/clustalw.py @@ -1,14 +1,26 @@ from ost.bindings import utils -from ost import settings, io, seq +from ost import settings, io, seq, LogError import os import subprocess -def ClustalW(seq1, seq2, clustalw=None): +def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False): clustalw_path=settings.Locate(('clustalw', 'clustalw2'), explicit_file_name=clustalw) - seq_list=seq.CreateSequenceList() - seq_list.AddSequence(seq1) - seq_list.AddSequence(seq2) + + if seq2!=None: + if isinstance(seq1, seq.SequenceHandle) and isinstance(seq2, seq.SequenceHandle): + seq_list=seq.CreateSequenceList() + seq_list.AddSequence(seq1) + seq_list.AddSequence(seq2) + else: + LogError("WARNING: Specify at least two Sequences") + return + elif isinstance(seq1, seq.SequenceList): + seq_list=seq1 + else: + LogError("WARNING: Specify either two SequenceHandles or one SequenceList") + return + temp_dir=utils.TempDirWithFiles((seq_list,)) out=os.path.join(temp_dir.dirname, 'out.fasta') command='%s -infile="%s" -output=fasta -outfile="%s"' % (clustalw_path, @@ -17,15 +29,17 @@ def ClustalW(seq1, seq2, clustalw=None): ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) ps.stdout.readlines() aln=io.LoadAlignment(out) - aln.SetSequenceOffset(0,seq1.GetSequenceOffset()) - aln.SetSequenceOffset(1,seq2.GetSequenceOffset()) - if seq1.HasAttachedView(): - aln.AttachView(0,seq1.GetAttachedView().Copy()) - if seq2.HasAttachedView(): - aln.AttachView(1,seq2.GetAttachedView().Copy()) + + for sequence in seq_list: + for seq_num,aln_seq in enumerate(aln.sequences): + if aln_seq.GetName()==sequence.GetName(): + break + aln.SetSequenceOffset(seq_num,sequence.GetSequenceOffset()) + if sequence.HasAttachedView(): + aln.AttachView(seq_num,sequence.GetAttachedView().Copy()) - #~ temp_dir.Cleanup() + if not keep_files: + temp_dir.Cleanup() return aln - diff --git a/modules/bindings/tests/CMakeLists.txt b/modules/bindings/tests/CMakeLists.txt index 0c89630581708b409e4ba1d428ef4630554fe001..5a3f44e3d827f7571b5aaed58fde891abc8543bb 100644 --- a/modules/bindings/tests/CMakeLists.txt +++ b/modules/bindings/tests/CMakeLists.txt @@ -1,5 +1,6 @@ set(OST_BINDINGS_UNIT_TESTS test_msms.py + test_clustalw.py ) ost_unittest(bindings "${OST_BINDINGS_UNIT_TESTS}") diff --git a/modules/bindings/tests/test_clustalw.py b/modules/bindings/tests/test_clustalw.py new file mode 100644 index 0000000000000000000000000000000000000000..fc93a19b6d4c270d46f1c79a3554077e8295f908 --- /dev/null +++ b/modules/bindings/tests/test_clustalw.py @@ -0,0 +1,44 @@ +import unittest +from ost import * +from ost import settings +from ost.bindings import clustalw + +class TestClustalWBindings(unittest.TestCase): + + def setUp(self): + self.protein = io.LoadEntity("testfiles/testprotein.pdb").Select("cname=A") + + self.targetseq = io.LoadSequence("testfiles/test.fasta") + self.targetseq.AttachView(self.protein) + self.templseq = io.LoadSequence("testfiles/similar.fasta") + self.multseq = io.LoadSequenceList("testfiles/multiple.fasta") + self.pw_alignment = io.LoadAlignment("testfiles/pairwise_aln.fasta") + self.mult_alignment = io.LoadAlignment("testfiles/multiple_aln.fasta") + + def testPairwiseClustalW(self): + aln=clustalw.ClustalW(self.targetseq, self.templseq) + assert self.pw_alignment.ToString(80) == aln.ToString(80), \ + "Pairwise alignment differs from precomputed one" + + def testAttachedViewClustalW(self): + aln=clustalw.ClustalW(self.targetseq, self.templseq) + assert aln.FindSequence("testseq").HasAttachedView(), \ + "Aligned sequence doesn't have an attached view" + + def testMultipleClustalW(self): + aln=clustalw.ClustalW(self.multseq) + assert self.mult_alignment.ToString(80) == aln.ToString(80), \ + "Multiple alignment differs from precomputed one" + + +if __name__ == "__main__": + # test if clustalw package is available on system, otherwise ignore tests + try: + clustalw_path=settings.Locate(('clustalw', 'clustalw2')) + except(settings.FileNotFound): + print "Could not find clustalw executable: ignoring unit tests" + exit(0) + try: + unittest.main() + except Exception, e: + print e \ No newline at end of file diff --git a/modules/bindings/tests/testfiles/multiple.fasta b/modules/bindings/tests/testfiles/multiple.fasta new file mode 100644 index 0000000000000000000000000000000000000000..56895f50475694418d7f7c4dab0cb5d0a0dc737f --- /dev/null +++ b/modules/bindings/tests/testfiles/multiple.fasta @@ -0,0 +1,58 @@ +>P50198.1 +MANRLAGKVALITGGASGLGAAQAKRFAEEGAKVVIGDLNEEMAKGVVAEIRAAGGDALFIRLDVTDAASWNNAIAAAVDGFGGLTTLSNTAGIIHPGGFEEESIEGWNKMVAVNQTAIFLGIKAAIPELVKSGNGSIINISSLIGMFPTAGNASYCATKAAVRIMSKAAALEFVDRGVRVNTIVPGGMNTPITANVPPDVLKQQTSQIPMGKLGDPIDIANGALFLASDEAKYITGVDLPIDGGWSVGV +>Q9WYG0.1 +MNFQGKVVLITGAGSGIGKKAAVMFAERGAKVAINDISEEKGKETVELIKSMGGEAAFIFGDVAKDAEQIVKKTVETFGRLDILVNNAGIVPYGNIEETSEEDFDKTMAVNVKGPFLLSKYAVEQMKKQGGGVIVNVSSEAGLIGIPRRCVYSVSKAALLGLTRSLAVDYVDYGIRVNAVCPGTTQSEGLMARVKASPNPEELLKKMTSRIPMKRLGKEEEIAFAILFAACDEAGFMTGSIINIDGGSTAV +>Q8KWT4.1 +MNLTDKTVLITGGASGIGYAAVQAFLNQQANVVVADIDEAQGEAMIRKENNDRLHFVHTDITDEPACQNAIRSAVDKFGGLDVLINNAGIEIVAPIHEMELSNWNKVLNVNLTGMFLMSKHALKYMLKSGKGNIINTCSVGGVVAWPDIPAYNASKGGVLQLTRSMAVDYAKHNIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P39640.2 +MNLTDKTVLITGGASGIGYAAVQAFLGQQANVVVADIDEAQGEAMVRKENNDRLHFVQTDITDEAACQHAVESAVHTFGGLDVLINNAGIEIVAPIHEMELSDWNKVLQVNLTGMFLMSKHALKHMLAAGKGNIINTCSVGGLVAWPDIPAYNASKGGVLQLTKSMAVDYAKHQIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P46331.2 +MGRLENKTAVITGAATGIGQATAEVFANEGARVIIGDINKDQMEETVDAIRKNGGQAESFHLDVSDENSVKAFADQIKDACGTIDILFNNAGVDQEGGKVHEYPVDLFDRIIAVDLRGTFLCSKYLIPLMLENGGSIINTSSMSGRAADLDRSGYNAAKGGITNLTKAMAIDYARNGIRVNSISPGTIETPLIDKLAGTKEQEMGEQFREANKWITPLGRLGQPKEMATVALFLASDDSSYVTGEDITADGGIMAYTWPGKMLIEEKWKEETK +>P50197.1 +MSDLSGKTIIVTGGGSGIGRATVELLVASGANVPVADINDEAGEAVVATSGGKAAYFRCDIAQEEDVKALVAQTLAAFGGLDGSFNNAAIPQAGLPLAEVSLERFRQSMDINVTGTFLCMKYQILAMIERGTKGSIVNTASAAGVVGVPMHGEYVGAKHAVVGLTRVAAADYGKHGIRVNALVPGAVRTPMLQRAMDNDAGLEPYLNSIHPIGRFSEPHEQAQAAVWLLSDAASFVTGSCLAADGGFTAI +>P42317.2 +MRKQVALVTGAAGGIGFEIAREFAREGASVIVSDLRPEACEKAASKLAEEGFDAAAIPYDVTKEAQVADTVNVIQKQYGRLDILVNNAGIQHVAPIEEFPTDTFEQLIKVMLTAPFIAMKHVFPIMKKQQFGRIINIASVNGLVGFAGKSAYNSAKHGVIGLTKVGALEGAPHGITVNALCPGYVDTQLVRNQLSDLSKTRNVPYDSVLEQVIFPLVPQKRLLSVKEIADYAVFLASEKAKGVTGQAVVLDGGYTAQ +>target +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q59787.1 +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q48436.2 +MKKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATAKAVASEINQAGGRAMAVKVDVSDRDQVFAAVEQARKTLGGFDVIVNNAGVAPSTPIESITPEIVDKVYNINVKGVIWGIQAAVEAFKKEGHGGKIINACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGYCPGIVKTPMWAEIDRQVSEAAGKPLGYGTAEFAKRITLGRLSEPEDVAACVSYLASPDSDYMTGQSLLIDGGMVFN +>Q04520.1 +MQKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDVSRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESITEEIVDRVYNINVKGVIWGMQAAVEAFKKEGHGGKIVNACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGFCPGIVKTPMWAEIDRQCRKRRANRWATARLNLPNASPLAACRSLKTSPPACRSSPARIPTI +>P66776.1 +MTNNKVALVTGGAQGIGFKIAERLVEDGFKVAVVDFNEEGAKAAALKLSSDGTKAIAIKADVSNRDDVFNAVRQTAAQFGDFHVMVNNAGLGPTTPIDTITEEQFKTVYGVNVAGVLWGIQAAHEQFKKFNHGGKIINATSQAGVEGNPGLSLYCSTKFAVRGLTQVAAQDLASEGITVNAFAPGIVQTPMMESIAVATAEEAGKPEAWGWEQFTSQIALGRVSQPEDVSNVVSFLAGKDSDYITGQTIIVDGGMRFR +>Q5HKG6.1 +MSKTAIITGSAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAFKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>Q8CQD2.1 +MSKTAIITGAAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAYKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>P50199.1 +MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRGKIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFTDWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL +>P0A9Q0.1 +MNDLFSLAGKNILITGSAQGIGFLLATGLGKYGAQIIINDITAERAELAVEKLHQEGIQAVAAPFNVTHKHEIDAAVEHIEKDIGPIDVLVNNAGIQRRHPFTEFPEQEWNDVIAVNQTAVFLVSQAVTRHMVERKAGKVINICSMQSELGRDTITPYAASKGAVKMLTRGMCVELARHNIQVNGIAPGYFKTEMTKALVEDEAFTAWLCKRTPAARWGDPQELIGAAVFLSSKASDFVNGHLLFVDGGMLVAV +>Q05528.1 +MILNTFNLQGKVALITGCDTGLGQGMAVGLAEAGCDIVGVNIVEPKETIEKVTAVGRRFLSLTADMSDISGHAALVEKAVAEFGKVDILVNNAGIIRREDAIEFSEKNWDDVMNLNIKSVFFMSQTVARQFIKQGHGGKIINIASMLSFQGGIPVPSYTASKKRVMGITRIVANEWAKHNINVNAIAPGYMATNNTQHVRADQDRSKEILDRIPAGRWGLPQDLQGPAVFLASSASDYVNGYTIAVDGGWLAR +>P37769.2 +MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPTETIEQVTALGRRFLSLTADLRKIDGIPALLDRAVAEFGHIDILVNNAGLIRREDALEFSEKDWDDVMNLNIKSVFFMSQAAAKHFIAQGNGGKIINIASMLSFQGGIRVPSYTASKSGVMGVTRLMANEWAKHNINVNAIAPGYMATNNTQQLRADEQRSAEILDRIPAGRWGLPSDLMGPIVFLASSASDYVNGYTIAVDGGWLAR +>P50842.1 +MGYLHDAFSLKGKTALVTGPGTGIGQGIAKALAGAGADIIGTSHTSSLSETQQLVEQEGRIFTSFTLDMSKPEAIKDSAAELFENRQIDILVNNAGIIHREKAEDFPEENWQHVLNVNLNSLFILTQLAGRHMLKRGHGKIINIASLLSFQGGILVPAYTASKHAVAGLTKSFANEWAASGIQVNAIAPGYISTANTKPIRDDEKRNEDILKRIPAGRWGQADDIGGTAVFLASRASDYVNGHILAVDGGWLSR +>Q1JP75.1 +MDLRLAGRRALVTGAGKGIGRSIVKALHAAGARVVAVSRTQADLDSLVRECPGVETVCVDLADWEATEQALGGVGPVDLLVNNAAVAFLQPFLEVTKEAYDMSFSVNLRAVIQVSQIVARGLIARGAPGVIVNVSSQASQRGLTNHSVYCSTKGALDTLTKVMAVELGPHKIRVNAVNPTVVMTPMGQAAWSDPQKAKAMLDRIPLGRFAEVENVVDTILFLLSDRSSMTTGSTVPVDGGFLAT +>Q91XV4.1 +MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDSLVSECPGVETVCVDLADWEATEQALSSVGPVDLLVNNAAVALLQPFLEVTKEAFDMSFNVNLRAVIQVSQIVARGMIARGAPGAIVNVSSQASQRALANHSVYCSTKGALDMLTKMMALELGPHKIRVNAVNPTVVMTSMGRTNWSDPHKAKVMLDRIPLGKFAEVENVVDAILFLLSHRSNMTTGSTLPVDGGFLVT +>Q7Z4W1.2 +MELFLAGRRVLVTGAGKGIGRGTVQALHATGARVVAVSRTQADLDSLVRECPGIEPVCVDLGDWEATERALGSVGPVDLLVNNAAVALLQPFLEVTKEAFDRSFEVNLRAVIQVSQIVARGLIARGVPGAIVNVSSQCSQRAVTNHSVYCSTKGALDMLTKVMALELGPHKIRVNAVNPTVVMTSMGQATWSDPHKAKTMLNRIPLGKFAEVEHVVNAILFLLSDRSGMTTGSTLPVEGGFWAC +>P40397.2 +MANQKKKTLPPQHQNQQPGFEYLMDPRPVFDKPKKAKKLEGKTAIITGGDSGIGRAVSVLFAKEGANVVIVYLNEHQDAEETKQYVEKEGVKCLLIAGDVGDEAFCNDVVGQASQVFPSIDILVNNAAEQHVQPSIEKITSHQLIRTFQTNIFSMFYLTKAVLPHLKKGSSIINTASITAYKGNKTLIDYSATKGAIVTFTRSLSQSLVQQGIRVNAVAPGPIWTPLIPASFAAKDVEVFGSDVPMERPGQPVEVAPSYLYLASDDSTYVTGQTIHVNGGTIVNG +>O07575.1 +MNPMDRQTEGQEPQHQDRQPGIESKMNPLPLSEDEDYRGSGKLKGKVAIITGGDSGIGRAAAIAFAKEGADISILYLDEHSDAEETRKRIEKENVRCLLIPGDVGDENHCEQAVQQTVDHFGKLDILVNNAAEQHPQDSILNISTEQLEKTFRTNIFSMFHMTKKALPHLQEGCAIINTTSITAYEGDTALIDYSSTKGAIVSFTRSMAKSLADKGIRVNAVAPGPIWTPLIPATFPEEKVKQHGLDTPMGRPGQPVEHAGAYVLLASDESSYMTGQTIHVNGGRFIST +>Q5KTS5.1 +MASGGQFPPQKQESQPGKEHLMDPSPQHASPHYKPANKLQGKVALVTGGDSGIGRSVCYHFALEGATVAFTFVKGHEDKDANETLELLRKAKSSDAKDPIAIAADLGFDDNCKKVVDQVVNAFGSIDVLVNNAAEQYKASTVEDIDEERLERVFRTNIFAYFFMARHALKHMREGSTIINTTSINAYKGNAKLLDYTATKGAIVAFTRGLSLQLISKGIRVNGVAPGPVWTPLIPSSFDEEEVKQFGSEVPMKRAGQPYEIATAYVFLASCDSSYYSGQVLHPNGGAIVNG +>Q49117.2 +MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYASSKAGADAVVEAITAAGGKAIAVQADVSQAVQARGLVEAAVQQFGRLDVLVNNSGVYEFAAIEEVTEEHYRRIFDVNVLGVLLATQAASKHLGEGGSIINISSVVTDVLMPTSAVYSGTKGALNAISGVLANELAPRKIRVNVVSPGYVVTEGTHTAGIAGSEMEAGLVAQTPLGRSGQPDDIAGVVAFLASDDARWVTGEVINASGGVR +>Q9KQH7.2 +MNLEGKVALVTGASRGIGKAIAELLAERGAKVIGTATSESGAQAISDYLGDNGKGMALNVTNPESIEAVLKAITDEFGGVDILVNNAGITRDNLLMRMKEEEWSDIMETNLTSIFRLSKAVLRGMMKKRQGRIINVGSVVGTMGNAGQANYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDEQRTATLAQVPAGRLGDPREIASAVAFLASPEAAYITGETLHVNGGMYMI +>P55336.1 +MNLEGKIALVTGASRGIGRAIAELLVERGATVIGTATSEGGAAAISEYLGENGKGLALNVTDVESIEATLKTINDECGAIDILVNNAGITRDNLLMRMKDDEWNDIINTNLTPIYRMSKAVLRGMMKKRAGRIINVGSVVGTMGNAGQTNYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDDQRAATLSNVPAGRLGDPREIASAVVFLASPEAAYITGETLHVNGGMYMV +>P0A2C9.1 +MSFEGKIALVTGASRGIGRAIAETLVARGAKVIGTATSENGAKNISDYLGANGKGLMLNVTDPASIESVLENIRAEFGEVDILVNNAGITRDNLLMRMKDDEWNDIIETNLSSVFRLSKAVMRAMMKKRCGRIITIGSVVGTMGNAGQANYAAAKAGLIGFSKSLAREVASRGITVNVVAPGFIETDMTRALSDDQRAGILAQVPAGRLGGAQEIASAVAFLASDEASYITGETLHVNGGMYMV diff --git a/modules/bindings/tests/testfiles/multiple_aln.fasta b/modules/bindings/tests/testfiles/multiple_aln.fasta new file mode 100644 index 0000000000000000000000000000000000000000..45aff7ddb88fb44399b02de678e51844b550c6a4 --- /dev/null +++ b/modules/bindings/tests/testfiles/multiple_aln.fasta @@ -0,0 +1,58 @@ +>P50198.1 +--------------------------------------MANRLAGKVALITGGASGLGAAQAKRFAEEGAKVVIGDLNEEMAKGVVAEIRAAGGD-------ALFIRLDVTDAASWNNAIAAAVDGFG--GLTTLSNTAGIIHPGG-FEEESIEGWNKMVAVNQTAIFLGIKAAIPELVKSGN-GSIINISSLIGMFPTAGNASYCATKAAVRIMSKAAALEFVDRGVRVNTIVPG-----GMNTP--ITAN-----VPPDVLKQQTSQIPMGKLGDPIDIANGALFLASDEAKYITGVDLPIDGGWSVGV---------------- +>Q9WYG0.1 +--------------------------------------MN--FQGKVVLITGAGSGIGKKAAVMFAERGAKVAINDISEEKGKETVELIKSMGGE-------AAFIFGDVAKDA--EQIVKKTVETFG--RLDILVNNAGIVPYGN-IEETSEEDFDKTMAVNVKGPFLLSKYAVEQMKKQGG-GVIVNVSSEAGLIGIPRRCVYSVSKAALLGLTRSLAVDYVDYGIRVNAVCPGTTQSEGLMAR--VKASP----NPEELLKKMTSRIPMKRLGKEEEIAFAILFAACDEAGFMTGSIINIDGG-STAV---------------- +>Q8KWT4.1 +--------------------------------------MN--LTDKTVLITGGASGIGYAAVQAFLNQQANVVVADIDEAQGEAMIR--KENNDR-------LHFVHTDITDEPACQNAIRSAVDKFG--GLDVLINNAGIEIVAP-IHEMELSNWNKVLNVNLTGMFLMSKHALKYMLKSGK-GNIINTCSVGGVVAWPDIPAYNASKGGVLQLTRSMAVDYAKHNIRVNCVCPGIIDTPLNEKSFLENNEG----TLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ----------------- +>P39640.2 +--------------------------------------MN--LTDKTVLITGGASGIGYAAVQAFLGQQANVVVADIDEAQGEAMVR--KENNDR-------LHFVQTDITDEAACQHAVESAVHTFG--GLDVLINNAGIEIVAP-IHEMELSDWNKVLQVNLTGMFLMSKHALKHMLAAGK-GNIINTCSVGGLVAWPDIPAYNASKGGVLQLTKSMAVDYAKHQIRVNCVCPGIIDTPLNEKSFLENNEG----TLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ----------------- +>P46331.2 +--------------------------------------MGR-LENKTAVITGAATGIGQATAEVFANEGARVIIGDINKDQMEETVDAIRKNGGQ-------AESFHLDVSDENSVKAFADQIKDACG--TIDILFNNAGVDQEGGKVHEYPVDLFDRIIAVDLRGTFLCSKYLIPLMLENG--GSIINTSSMSGRAADLDRSGYNAAKGGITNLTKAMAIDYARNGIRVNSISPGTIETPLIDKLAGTKEQE----MGEQFREANKWITPLGRLGQPKEMATVALFLASDDSSYVTGEDITADGGIMAYTWPGKMLIEEKWKEETK +>P50197.1 +--------------------------------------MSD-LSGKTIIVTGGGSGIGRATVELLVASGANVPVADINDEAGEAVVA---TSGGK-------AAYFRCDIAQEEDVKALVAQTLAAFG--GLDGSFNNAAIPQAGLPLAEVSLERFRQSMDINVTGTFLCMKYQILAMIERGTKGSIVNTASAAGVVGVPMHGEYVGAKHAVVGLTRVAAADYGKHGIRVNALVPGAVRTPMLQR--AMDNDA----GLEPYLNS---IHPIGRFSEPHEQAQAAVWLLSDAASFVTGSCLAADGGFTAI----------------- +>P50199.1 +----------------------------------MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLK-------ASTAVFDVTDQDAVIDGVAAIERDMG--PIDILINNAGIQRRAP-LEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRG-KIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFT----------DWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL---------------- +>P0A9Q0.1 +----------------------------------MN--DLFSLAGKNILITGSAQGIGFLLATGLGKYGAQIIINDITAERAELAVEKLHQEGIQ-------AVAAPFNVTHKHEIDAAVEHIEKDIG--PIDVLVNNAGIQRRHP-FTEFPEQEWNDVIAVNQTAVFLVSQAVTRHMVERKAG-KVINICSMQSELGRDTITPYAASKGAVKMLTRGMCVELARHNIQVNGIAPGYFKTEMTKALVEDEAFT----------AWLCKRTPAARWGDPQELIGAAVFLSSKASDFVNGHLLFVDGGMLVAV---------------- +>Q05528.1 +-----------------------------------MILNTFNLQGKVALITGCDTGLGQGMAVGLAEAGCDIVGVN-IVE-PKETIEKVTAVGRR-------FLSLTADMSDISGHAALVEKAVAEFG--KVDILVNNAGIIRRED-AIEFSEKNWDDVMNLNIKSVFFMSQTVARQFIKQGHGGKIINIASMLSFQGGIPVPSYTASKKRVMGITRIVANEWAKHNINVNAIAPGYMATNNTQHVRADQDRS----------KEILDRIPAGRWGLPQDLQGPAVFLASSASDYVNGYTIAVDGGWLAR----------------- +>P37769.2 +-----------------------------------MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGIN-IVE-PTETIEQVTALGRR-------FLSLTADLRKIDGIPALLDRAVAEFG--HIDILVNNAGLIRRED-ALEFSEKDWDDVMNLNIKSVFFMSQAAAKHFIAQGNGGKIINIASMLSFQGGIRVPSYTASKSGVMGVTRLMANEWAKHNINVNAIAPGYMATNNTQQLRADEQRS----------AEILDRIPAGRWGLPSDLMGPIVFLASSASDYVNGYTIAVDGGWLAR----------------- +>P50842.1 +---------------------------------MGYLHDAFSLKGKTALVTGPGTGIGQGIAKALAGAGADIIGTS-HTSSLSETQQLVEQEGRI-------FTSFTLDMSKPE---AIKDSAAELFENRQIDILVNNAGIIHREK-AEDFPEENWQHVLNVNLNSLFILTQLAGRHMLKRGHG-KIINIASLLSFQGGILVPAYTASKHAVAGLTKSFANEWAASGIQVNAIAPGYISTANTKPIRDDEKRN----------EDILKRIPAGRWGQADDIGGTAVFLASRASDYVNGHILAVDGGWLSR----------------- +>Q9KQH7.2 +----------------------------------------MNLEGKVALVTGASRGIGKAIAELLAERGAKVIGTATSESGAQAISDYLGDNG----------KGMALNVTNPESIEAVLKAITDEFG--GVDILVNNAGITRDNL-LMRMKEEEWSDIMETNLTSIFRLSKAVLRGMMKKRQG-RIINVGSVVGTMGNAGQANYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDEQR------------TATLAQVPAGRLGDPREIASAVAFLASPEAAYITGETLHVNGGMYMI----------------- +>P55336.1 +----------------------------------------MNLEGKIALVTGASRGIGRAIAELLVERGATVIGTATSEGGAAAISEYLGENG----------KGLALNVTDVESIEATLKTINDECG--AIDILVNNAGITRDNL-LMRMKDDEWNDIINTNLTPIYRMSKAVLRGMMKKRAG-RIINVGSVVGTMGNAGQTNYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDDQR------------AATLSNVPAGRLGDPREIASAVVFLASPEAAYITGETLHVNGGMYMV----------------- +>P0A2C9.1 +----------------------------------------MSFEGKIALVTGASRGIGRAIAETLVARGAKVIGTATSENGAKNISDYLGANG----------KGLMLNVTDPASIESVLENIRAEFG--EVDILVNNAGITRDNL-LMRMKDDEWNDIIETNLSSVFRLSKAVMRAMMKKRCG-RIITIGSVVGTMGNAGQANYAAAKAGLIGFSKSLAREVASRGITVNVVAPGFIETDMTRALSDDQR------------AGILAQVPAGRLGGAQEIASAVAFLASDEASYITGETLHVNGGMYMV----------------- +>P42317.2 +------------------------------------------MRKQVALVTGAAGGIGFEIAREFAREGASVIVSDLRPEACEKAASKLAEEGFD-------AAAIPYDVTKEAQVADTVNVIQKQYG--RLDILVNNAGIQHVAP-IEEFPTDTFEQLIKVMLTAPFIAMKHVFPIMKKQQFG-RIINIASVNGLVGFAGKSAYNSAKHGVIGLTKVGALEGAPHGITVNALCPGYVDTQLVRNQLSDLSKTRNVPYDSVLEQVIFPLVPQKRLLSVKEIADYAVFLASEKAKGVTGQAVVLDGGYTAQ----------------- +>P40397.2 +MANQKKKT--LPPQHQNQQPGFEYLMDPRPVFDKP--KKAKKLEGKTAIITGGDSGIGRAVSVLFAKEGANVVIVYLN--EHQDAEETKQYVEKE----GVKCLLIAGDVGDEAFCNDVVGQASQVFP--SIDILVNNAAEQHVQPSIEKITSHQLIRTFQTNIFSMFYLTKAVLPHLKKGSS---IINTASITAYKGNKTLIDYSATKGAIVTFTRSLSQSLVQQGIRVNAVAPGPIWTP--LIPASFAAKD---------VEVFGSDVPMERPGQPVEVAPSYLYLASDDSTYVTGQTIHVNGGTIVNG---------------- +>O07575.1 +MNPMDRQTEGQEPQHQDRQPGIESKMNPLPLSEDEDYRGSGKLKGKVAIITGGDSGIGRAAAIAFAKEGADISILYLD--EHSDAEETRKRIEKE----NVRCLLIPGDVGDENHCEQAVQQTVDHFG--KLDILVNNAAEQHPQDSILNISTEQLEKTFRTNIFSMFHMTKKALPHLQEGCA---IINTTSITAYEGDTALIDYSSTKGAIVSFTRSMAKSLADKGIRVNAVAPGPIWTP--LIPATFPEEK---------VKQHGLDTPMGRPGQPVEHAGAYVLLASDESSYMTGQTIHVNGGRFIST---------------- +>Q5KTS5.1 +MASGGQFP----PQKQESQPGKEHLMDPSPQHASPHYKPANKLQGKVALVTGGDSGIGRSVCYHFALEGATVAFTFVKGHEDKDANETLELLRKAKSSDAKDPIAIAADLGFDDNCKKVVDQVVNAFG--SIDVLVNNAAEQYKASTVEDIDEERLERVFRTNIFAYFFMARHALKHMREGST---IINTTSINAYKGNAKLLDYTATKGAIVAFTRGLSLQLISKGIRVNGVAPGPVWTP--LIPSSFDEEE---------VKQFGSEVPMKRAGQPYEIATAYVFLASCDSSYYSGQVLHPNGGAIVNG---------------- +>Q49117.2 +---------------------------------------MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYAS--SKAGADAVVEAITAA----GGKAIAVQADVSQAVQARGLVEAAVQQFG--RLDVLVN-NSGVYEFAAIEEVTEEHYRRIFDVNVLGVLLATQAASKHLGEGGS---IINISSVVTDVLMPTSAVYSGTKGALNAISGVLANELAPRKIRVNVVSPGYVVTEGTHTAGIAGSEM---------EAGLVAQTPLGRSGQPDDIAGVVAFLASDDARWVTGEVINASGGVR------------------- +>target +----------------------------------------MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEI---GP-------AACAIALDVTDQASIDRCVAELLDRWG--SIDILVNNAALFDLAP-IVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFAD-YENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS----------------- +>Q59787.1 +----------------------------------------MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEI---GP-------AACAIALDVTDQASIDRCVAELLDRWG--SIDILVNNAALFDLAP-IVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFAD-YENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS----------------- +>Q48436.2 +-------------------------------------------MKKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATAKAVASEINQAGG-------RAMAVKVDVSDRDQVFAAVEQARKTLG--GFDVIVNNAGVAPSTP-IESITPEIVDKVYNINVKGVIWGIQAAVEAFKKEGHGGKIINACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGYCPGIVKTPMWAEIDRQVSE-AAGKPLGYGTAEFAKRITLGRLSEPEDVAACVSYLASPDSDYMTGQSLLIDGGMVFN----------------- +>Q04520.1 +-------------------------------------------MQKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATATAVAAEINQAGG-------RAVAIKVDVSRRDQVFAAVEQARKALG--GFNVIVNNAGIAPSTP-IESITEEIVDRVYNINVKGVIWGMQAAVEAFKKEGHGGKIVNACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGFCPGIVKTPMWAEIDRQCRK-RRAN--RWATARLN-------LPNASPLAACRSLKTSPPACRSSPARIPTI----------------------- +>P66776.1 +-----------------------------------------MTNNKVALVTGGAQGIGFKIAERLVEDGFKVAVVDFNEEGAKAAALKLSSDGT-------KAIAIKADVSNRDDVFNAVRQTAAQFG--DFHVMVNNAGLGPTTP-IDTITEEQFKTVYGVNVAGVLWGIQAAHEQFKKFNHGGKIINATSQAGVEGNPGLSLYCSTKFAVRGLTQVAAQDLASEGITVNAFAPGIVQTPMMESIAVATAE-EAGKPEAWGWEQFTSQIALGRVSQPEDVSNVVSFLAGKDSDYITGQTIIVDGGMRFR----------------- +>Q5HKG6.1 +-------------------------------------------MSKTAIITGSAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGY-------QAVAFKSDVSKKKEQEELVQFAVTEFG--QLDVMVNNAGVDAVTP-ILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR----------------- +>Q8CQD2.1 +-------------------------------------------MSKTAIITGAAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGY-------QAVAYKSDVSKKKEQEELVQFAVTEFG--QLDVMVNNAGVDAVTP-ILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR----------------- +>Q1JP75.1 +--------------------------------------MDLRLAGRRALVTGAGKGIGRSIVKALHAAGARVVAVSRTQADLDSLVRECP-----------GVETVCVDLAD----WEATEQALGGVG--PVDLLVNNAAVAFLQP-FLEVTKEAYDMSFSVNLRAVIQVSQIVARGLIARGAPGVIVNVSSQASQRGLTNHSVYCSTKGALDTLTKVMAVELGPHKIRVNAVNPTVVMTPMGQAAWSDPQK----------AKAMLDRIPLGRFAEVENVVDTILFLLSDRSSMTTGSTVPVDGGFLAT----------------- +>Q91XV4.1 +--------------------------------------MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDSLVSECP-----------GVETVCVDLAD----WEATEQALSSVG--PVDLLVNNAAVALLQP-FLEVTKEAFDMSFNVNLRAVIQVSQIVARGMIARGAPGAIVNVSSQASQRALANHSVYCSTKGALDMLTKMMALELGPHKIRVNAVNPTVVMTSMGRTNWSDPHK----------AKVMLDRIPLGKFAEVENVVDAILFLLSHRSNMTTGSTLPVDGGFLVT----------------- +>Q7Z4W1.2 +--------------------------------------MELFLAGRRVLVTGAGKGIGRGTVQALHATGARVVAVSRTQADLDSLVRECP-----------GIEPVCVDLGD----WEATERALGSVG--PVDLLVNNAAVALLQP-FLEVTKEAFDRSFEVNLRAVIQVSQIVARGLIARGVPGAIVNVSSQCSQRAVTNHSVYCSTKGALDMLTKVMALELGPHKIRVNAVNPTVVMTSMGQATWSDPHK----------AKTMLNRIPLGKFAEVEHVVNAILFLLSDRSGMTTGSTLPVEGGFWAC----------------- diff --git a/modules/bindings/tests/testfiles/pairwise_aln.fasta b/modules/bindings/tests/testfiles/pairwise_aln.fasta new file mode 100644 index 0000000000000000000000000000000000000000..b6fe766c1208cde1071a4f7144f7f4407c76fb63 --- /dev/null +++ b/modules/bindings/tests/testfiles/pairwise_aln.fasta @@ -0,0 +1,4 @@ +>testseq +------MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADIN---LEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS- +>P50199.1 +MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRG-KIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTE----RLVADEEFTDWLCKR-----TPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL diff --git a/modules/bindings/tests/testfiles/similar.fasta b/modules/bindings/tests/testfiles/similar.fasta new file mode 100644 index 0000000000000000000000000000000000000000..af6487c81676327376422a7c17485c5bf99db926 --- /dev/null +++ b/modules/bindings/tests/testfiles/similar.fasta @@ -0,0 +1,2 @@ +>P50199.1 +MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRGKIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFTDWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL diff --git a/modules/bindings/tests/testfiles/test.fasta b/modules/bindings/tests/testfiles/test.fasta new file mode 100644 index 0000000000000000000000000000000000000000..38da1518cab1d2c60452b96b401b8b251454574e --- /dev/null +++ b/modules/bindings/tests/testfiles/test.fasta @@ -0,0 +1,2 @@ +>testseq +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS