diff --git a/modules/bindings/tests/CMakeLists.txt b/modules/bindings/tests/CMakeLists.txt index d1b344ff59ab2b6972b901c1c4f2387b4484dbe2..7b04104d9d230d61ab27b4c56dce4730bd42f49e 100644 --- a/modules/bindings/tests/CMakeLists.txt +++ b/modules/bindings/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_BINDINGS_UNIT_TESTS test_msms.py test_clustalw.py test_blast.py + test_kclust.py ) ost_unittest(MODULE bindings diff --git a/modules/bindings/tests/test_kclust.py b/modules/bindings/tests/test_kclust.py new file mode 100644 index 0000000000000000000000000000000000000000..fe7251613484b55ad248f21edb2df12f61c0ca3e --- /dev/null +++ b/modules/bindings/tests/test_kclust.py @@ -0,0 +1,44 @@ +import sys +import unittest +from ost import * +from ost import settings +from ost.bindings import kclust + +class TestkClustBindings(unittest.TestCase): + + def testkClustExec(self): + + seq_list=io.LoadSequenceList('testfiles/multiple.fasta') + + clusters40=kclust.kClust(seq_list,clustering_thresh=40) + clusters90=kclust.kClust(seq_list,clustering_thresh=90) + + self.assertEqual(len(clusters40),16) + self.assertEqual(len(clusters90),27) + + def testkClustParseOutput(self): + + representative_ids=['Q5KTS5.1','P46331.2','P66776.1','P42317.2','P50199.1', + 'Q59787.1','Q9WYG0.1','P50197.1','P50198.1','Q7Z4W1.2', + 'Q04520.1'] + + cluster_sizes=[3,3,4,1,7,3,2,1,1,3,1] + + result=kclust._ParseOutput('testfiles') + + self.assertEqual(len(result),11) + + for c, r_id in zip(result, representative_ids): + self.assertEqual(c.representative_id, r_id) + for c, s in zip(result, cluster_sizes): + self.assertEqual(len(c.sequences),s) + +if __name__ == "__main__": + # test if kClust is available on system, otherwise ignore tests + try: + blastpath=settings.Locate(('kClust')) + except(settings.FileNotFound): + print "Could not find kClust executable: ignoring unit tests" + sys.exit(0) + from ost import testutils + testutils.RunTests() diff --git a/modules/bindings/tests/testfiles/clusters.dmp b/modules/bindings/tests/testfiles/clusters.dmp new file mode 100644 index 0000000000000000000000000000000000000000..f3974d2148114cc147a9b6ddb69e63a270a50e9c --- /dev/null +++ b/modules/bindings/tests/testfiles/clusters.dmp @@ -0,0 +1,30 @@ +# 29 +1 1 +2 1 +3 1 +4 4 +17 4 +18 4 +5 5 +6 5 +7 5 +10 5 +8 8 +9 9 +13 9 +14 9 +15 9 +16 9 +23 9 +25 9 +11 11 +12 11 +24 11 +19 19 +22 19 +20 20 +21 21 +26 26 +27 26 +28 26 +29 29 diff --git a/modules/bindings/tests/testfiles/fastadb.fasta b/modules/bindings/tests/testfiles/fastadb.fasta new file mode 100644 index 0000000000000000000000000000000000000000..56895f50475694418d7f7c4dab0cb5d0a0dc737f --- /dev/null +++ b/modules/bindings/tests/testfiles/fastadb.fasta @@ -0,0 +1,58 @@ +>P50198.1 +MANRLAGKVALITGGASGLGAAQAKRFAEEGAKVVIGDLNEEMAKGVVAEIRAAGGDALFIRLDVTDAASWNNAIAAAVDGFGGLTTLSNTAGIIHPGGFEEESIEGWNKMVAVNQTAIFLGIKAAIPELVKSGNGSIINISSLIGMFPTAGNASYCATKAAVRIMSKAAALEFVDRGVRVNTIVPGGMNTPITANVPPDVLKQQTSQIPMGKLGDPIDIANGALFLASDEAKYITGVDLPIDGGWSVGV +>Q9WYG0.1 +MNFQGKVVLITGAGSGIGKKAAVMFAERGAKVAINDISEEKGKETVELIKSMGGEAAFIFGDVAKDAEQIVKKTVETFGRLDILVNNAGIVPYGNIEETSEEDFDKTMAVNVKGPFLLSKYAVEQMKKQGGGVIVNVSSEAGLIGIPRRCVYSVSKAALLGLTRSLAVDYVDYGIRVNAVCPGTTQSEGLMARVKASPNPEELLKKMTSRIPMKRLGKEEEIAFAILFAACDEAGFMTGSIINIDGGSTAV +>Q8KWT4.1 +MNLTDKTVLITGGASGIGYAAVQAFLNQQANVVVADIDEAQGEAMIRKENNDRLHFVHTDITDEPACQNAIRSAVDKFGGLDVLINNAGIEIVAPIHEMELSNWNKVLNVNLTGMFLMSKHALKYMLKSGKGNIINTCSVGGVVAWPDIPAYNASKGGVLQLTRSMAVDYAKHNIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P39640.2 +MNLTDKTVLITGGASGIGYAAVQAFLGQQANVVVADIDEAQGEAMVRKENNDRLHFVQTDITDEAACQHAVESAVHTFGGLDVLINNAGIEIVAPIHEMELSDWNKVLQVNLTGMFLMSKHALKHMLAAGKGNIINTCSVGGLVAWPDIPAYNASKGGVLQLTKSMAVDYAKHQIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P46331.2 +MGRLENKTAVITGAATGIGQATAEVFANEGARVIIGDINKDQMEETVDAIRKNGGQAESFHLDVSDENSVKAFADQIKDACGTIDILFNNAGVDQEGGKVHEYPVDLFDRIIAVDLRGTFLCSKYLIPLMLENGGSIINTSSMSGRAADLDRSGYNAAKGGITNLTKAMAIDYARNGIRVNSISPGTIETPLIDKLAGTKEQEMGEQFREANKWITPLGRLGQPKEMATVALFLASDDSSYVTGEDITADGGIMAYTWPGKMLIEEKWKEETK +>P50197.1 +MSDLSGKTIIVTGGGSGIGRATVELLVASGANVPVADINDEAGEAVVATSGGKAAYFRCDIAQEEDVKALVAQTLAAFGGLDGSFNNAAIPQAGLPLAEVSLERFRQSMDINVTGTFLCMKYQILAMIERGTKGSIVNTASAAGVVGVPMHGEYVGAKHAVVGLTRVAAADYGKHGIRVNALVPGAVRTPMLQRAMDNDAGLEPYLNSIHPIGRFSEPHEQAQAAVWLLSDAASFVTGSCLAADGGFTAI +>P42317.2 +MRKQVALVTGAAGGIGFEIAREFAREGASVIVSDLRPEACEKAASKLAEEGFDAAAIPYDVTKEAQVADTVNVIQKQYGRLDILVNNAGIQHVAPIEEFPTDTFEQLIKVMLTAPFIAMKHVFPIMKKQQFGRIINIASVNGLVGFAGKSAYNSAKHGVIGLTKVGALEGAPHGITVNALCPGYVDTQLVRNQLSDLSKTRNVPYDSVLEQVIFPLVPQKRLLSVKEIADYAVFLASEKAKGVTGQAVVLDGGYTAQ +>target +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q59787.1 +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q48436.2 +MKKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATAKAVASEINQAGGRAMAVKVDVSDRDQVFAAVEQARKTLGGFDVIVNNAGVAPSTPIESITPEIVDKVYNINVKGVIWGIQAAVEAFKKEGHGGKIINACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGYCPGIVKTPMWAEIDRQVSEAAGKPLGYGTAEFAKRITLGRLSEPEDVAACVSYLASPDSDYMTGQSLLIDGGMVFN +>Q04520.1 +MQKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDVSRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESITEEIVDRVYNINVKGVIWGMQAAVEAFKKEGHGGKIVNACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGFCPGIVKTPMWAEIDRQCRKRRANRWATARLNLPNASPLAACRSLKTSPPACRSSPARIPTI +>P66776.1 +MTNNKVALVTGGAQGIGFKIAERLVEDGFKVAVVDFNEEGAKAAALKLSSDGTKAIAIKADVSNRDDVFNAVRQTAAQFGDFHVMVNNAGLGPTTPIDTITEEQFKTVYGVNVAGVLWGIQAAHEQFKKFNHGGKIINATSQAGVEGNPGLSLYCSTKFAVRGLTQVAAQDLASEGITVNAFAPGIVQTPMMESIAVATAEEAGKPEAWGWEQFTSQIALGRVSQPEDVSNVVSFLAGKDSDYITGQTIIVDGGMRFR +>Q5HKG6.1 +MSKTAIITGSAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAFKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>Q8CQD2.1 +MSKTAIITGAAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAYKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>P50199.1 +MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRGKIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFTDWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL +>P0A9Q0.1 +MNDLFSLAGKNILITGSAQGIGFLLATGLGKYGAQIIINDITAERAELAVEKLHQEGIQAVAAPFNVTHKHEIDAAVEHIEKDIGPIDVLVNNAGIQRRHPFTEFPEQEWNDVIAVNQTAVFLVSQAVTRHMVERKAGKVINICSMQSELGRDTITPYAASKGAVKMLTRGMCVELARHNIQVNGIAPGYFKTEMTKALVEDEAFTAWLCKRTPAARWGDPQELIGAAVFLSSKASDFVNGHLLFVDGGMLVAV +>Q05528.1 +MILNTFNLQGKVALITGCDTGLGQGMAVGLAEAGCDIVGVNIVEPKETIEKVTAVGRRFLSLTADMSDISGHAALVEKAVAEFGKVDILVNNAGIIRREDAIEFSEKNWDDVMNLNIKSVFFMSQTVARQFIKQGHGGKIINIASMLSFQGGIPVPSYTASKKRVMGITRIVANEWAKHNINVNAIAPGYMATNNTQHVRADQDRSKEILDRIPAGRWGLPQDLQGPAVFLASSASDYVNGYTIAVDGGWLAR +>P37769.2 +MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPTETIEQVTALGRRFLSLTADLRKIDGIPALLDRAVAEFGHIDILVNNAGLIRREDALEFSEKDWDDVMNLNIKSVFFMSQAAAKHFIAQGNGGKIINIASMLSFQGGIRVPSYTASKSGVMGVTRLMANEWAKHNINVNAIAPGYMATNNTQQLRADEQRSAEILDRIPAGRWGLPSDLMGPIVFLASSASDYVNGYTIAVDGGWLAR +>P50842.1 +MGYLHDAFSLKGKTALVTGPGTGIGQGIAKALAGAGADIIGTSHTSSLSETQQLVEQEGRIFTSFTLDMSKPEAIKDSAAELFENRQIDILVNNAGIIHREKAEDFPEENWQHVLNVNLNSLFILTQLAGRHMLKRGHGKIINIASLLSFQGGILVPAYTASKHAVAGLTKSFANEWAASGIQVNAIAPGYISTANTKPIRDDEKRNEDILKRIPAGRWGQADDIGGTAVFLASRASDYVNGHILAVDGGWLSR +>Q1JP75.1 +MDLRLAGRRALVTGAGKGIGRSIVKALHAAGARVVAVSRTQADLDSLVRECPGVETVCVDLADWEATEQALGGVGPVDLLVNNAAVAFLQPFLEVTKEAYDMSFSVNLRAVIQVSQIVARGLIARGAPGVIVNVSSQASQRGLTNHSVYCSTKGALDTLTKVMAVELGPHKIRVNAVNPTVVMTPMGQAAWSDPQKAKAMLDRIPLGRFAEVENVVDTILFLLSDRSSMTTGSTVPVDGGFLAT +>Q91XV4.1 +MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDSLVSECPGVETVCVDLADWEATEQALSSVGPVDLLVNNAAVALLQPFLEVTKEAFDMSFNVNLRAVIQVSQIVARGMIARGAPGAIVNVSSQASQRALANHSVYCSTKGALDMLTKMMALELGPHKIRVNAVNPTVVMTSMGRTNWSDPHKAKVMLDRIPLGKFAEVENVVDAILFLLSHRSNMTTGSTLPVDGGFLVT +>Q7Z4W1.2 +MELFLAGRRVLVTGAGKGIGRGTVQALHATGARVVAVSRTQADLDSLVRECPGIEPVCVDLGDWEATERALGSVGPVDLLVNNAAVALLQPFLEVTKEAFDRSFEVNLRAVIQVSQIVARGLIARGVPGAIVNVSSQCSQRAVTNHSVYCSTKGALDMLTKVMALELGPHKIRVNAVNPTVVMTSMGQATWSDPHKAKTMLNRIPLGKFAEVEHVVNAILFLLSDRSGMTTGSTLPVEGGFWAC +>P40397.2 +MANQKKKTLPPQHQNQQPGFEYLMDPRPVFDKPKKAKKLEGKTAIITGGDSGIGRAVSVLFAKEGANVVIVYLNEHQDAEETKQYVEKEGVKCLLIAGDVGDEAFCNDVVGQASQVFPSIDILVNNAAEQHVQPSIEKITSHQLIRTFQTNIFSMFYLTKAVLPHLKKGSSIINTASITAYKGNKTLIDYSATKGAIVTFTRSLSQSLVQQGIRVNAVAPGPIWTPLIPASFAAKDVEVFGSDVPMERPGQPVEVAPSYLYLASDDSTYVTGQTIHVNGGTIVNG +>O07575.1 +MNPMDRQTEGQEPQHQDRQPGIESKMNPLPLSEDEDYRGSGKLKGKVAIITGGDSGIGRAAAIAFAKEGADISILYLDEHSDAEETRKRIEKENVRCLLIPGDVGDENHCEQAVQQTVDHFGKLDILVNNAAEQHPQDSILNISTEQLEKTFRTNIFSMFHMTKKALPHLQEGCAIINTTSITAYEGDTALIDYSSTKGAIVSFTRSMAKSLADKGIRVNAVAPGPIWTPLIPATFPEEKVKQHGLDTPMGRPGQPVEHAGAYVLLASDESSYMTGQTIHVNGGRFIST +>Q5KTS5.1 +MASGGQFPPQKQESQPGKEHLMDPSPQHASPHYKPANKLQGKVALVTGGDSGIGRSVCYHFALEGATVAFTFVKGHEDKDANETLELLRKAKSSDAKDPIAIAADLGFDDNCKKVVDQVVNAFGSIDVLVNNAAEQYKASTVEDIDEERLERVFRTNIFAYFFMARHALKHMREGSTIINTTSINAYKGNAKLLDYTATKGAIVAFTRGLSLQLISKGIRVNGVAPGPVWTPLIPSSFDEEEVKQFGSEVPMKRAGQPYEIATAYVFLASCDSSYYSGQVLHPNGGAIVNG +>Q49117.2 +MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYASSKAGADAVVEAITAAGGKAIAVQADVSQAVQARGLVEAAVQQFGRLDVLVNNSGVYEFAAIEEVTEEHYRRIFDVNVLGVLLATQAASKHLGEGGSIINISSVVTDVLMPTSAVYSGTKGALNAISGVLANELAPRKIRVNVVSPGYVVTEGTHTAGIAGSEMEAGLVAQTPLGRSGQPDDIAGVVAFLASDDARWVTGEVINASGGVR +>Q9KQH7.2 +MNLEGKVALVTGASRGIGKAIAELLAERGAKVIGTATSESGAQAISDYLGDNGKGMALNVTNPESIEAVLKAITDEFGGVDILVNNAGITRDNLLMRMKEEEWSDIMETNLTSIFRLSKAVLRGMMKKRQGRIINVGSVVGTMGNAGQANYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDEQRTATLAQVPAGRLGDPREIASAVAFLASPEAAYITGETLHVNGGMYMI +>P55336.1 +MNLEGKIALVTGASRGIGRAIAELLVERGATVIGTATSEGGAAAISEYLGENGKGLALNVTDVESIEATLKTINDECGAIDILVNNAGITRDNLLMRMKDDEWNDIINTNLTPIYRMSKAVLRGMMKKRAGRIINVGSVVGTMGNAGQTNYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDDQRAATLSNVPAGRLGDPREIASAVVFLASPEAAYITGETLHVNGGMYMV +>P0A2C9.1 +MSFEGKIALVTGASRGIGRAIAETLVARGAKVIGTATSENGAKNISDYLGANGKGLMLNVTDPASIESVLENIRAEFGEVDILVNNAGITRDNLLMRMKDDEWNDIIETNLSSVFRLSKAVMRAMMKKRCGRIITIGSVVGTMGNAGQANYAAAKAGLIGFSKSLAREVASRGITVNVVAPGFIETDMTRALSDDQRAGILAQVPAGRLGGAQEIASAVAFLASDEASYITGETLHVNGGMYMV diff --git a/modules/bindings/tests/testfiles/headers.dmp b/modules/bindings/tests/testfiles/headers.dmp new file mode 100644 index 0000000000000000000000000000000000000000..7c188177f057ad5bad8d2ce8518f39e3b16450d3 --- /dev/null +++ b/modules/bindings/tests/testfiles/headers.dmp @@ -0,0 +1,29 @@ +1 >Q5KTS5.1 +2 >O07575.1 +3 >P40397.2 +4 >P46331.2 +17 >P39640.2 +18 >Q8KWT4.1 +5 >P66776.1 +6 >Q8CQD2.1 +7 >Q5HKG6.1 +10 >Q48436.2 +8 >P42317.2 +9 >P50199.1 +13 >P50842.1 +14 >P0A9Q0.1 +15 >P37769.2 +16 >Q05528.1 +23 >P0A2C9.1 +25 >Q9KQH7.2 +11 >Q59787.1 +12 >target +24 >P55336.1 +19 >Q9WYG0.1 +22 >Q49117.2 +20 >P50197.1 +21 >P50198.1 +26 >Q7Z4W1.2 +27 >Q91XV4.1 +28 >Q1JP75.1 +29 >Q04520.1