From 1e7f33d63be100c3124a822c8868c4661bcc5560 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@stud.unibas.ch> Date: Tue, 20 Nov 2012 16:11:49 +0100 Subject: [PATCH] added unit tests for kclust binding --- modules/bindings/tests/CMakeLists.txt | 1 + modules/bindings/tests/test_kclust.py | 44 ++++++++++++++ modules/bindings/tests/testfiles/clusters.dmp | 30 ++++++++++ .../bindings/tests/testfiles/fastadb.fasta | 58 +++++++++++++++++++ modules/bindings/tests/testfiles/headers.dmp | 29 ++++++++++ 5 files changed, 162 insertions(+) create mode 100644 modules/bindings/tests/test_kclust.py create mode 100644 modules/bindings/tests/testfiles/clusters.dmp create mode 100644 modules/bindings/tests/testfiles/fastadb.fasta create mode 100644 modules/bindings/tests/testfiles/headers.dmp diff --git a/modules/bindings/tests/CMakeLists.txt b/modules/bindings/tests/CMakeLists.txt index d1b344ff5..7b04104d9 100644 --- a/modules/bindings/tests/CMakeLists.txt +++ b/modules/bindings/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_BINDINGS_UNIT_TESTS test_msms.py test_clustalw.py test_blast.py + test_kclust.py ) ost_unittest(MODULE bindings diff --git a/modules/bindings/tests/test_kclust.py b/modules/bindings/tests/test_kclust.py new file mode 100644 index 000000000..fe7251613 --- /dev/null +++ b/modules/bindings/tests/test_kclust.py @@ -0,0 +1,44 @@ +import sys +import unittest +from ost import * +from ost import settings +from ost.bindings import kclust + +class TestkClustBindings(unittest.TestCase): + + def testkClustExec(self): + + seq_list=io.LoadSequenceList('testfiles/multiple.fasta') + + clusters40=kclust.kClust(seq_list,clustering_thresh=40) + clusters90=kclust.kClust(seq_list,clustering_thresh=90) + + self.assertEqual(len(clusters40),16) + self.assertEqual(len(clusters90),27) + + def testkClustParseOutput(self): + + representative_ids=['Q5KTS5.1','P46331.2','P66776.1','P42317.2','P50199.1', + 'Q59787.1','Q9WYG0.1','P50197.1','P50198.1','Q7Z4W1.2', + 'Q04520.1'] + + cluster_sizes=[3,3,4,1,7,3,2,1,1,3,1] + + result=kclust._ParseOutput('testfiles') + + self.assertEqual(len(result),11) + + for c, r_id in zip(result, representative_ids): + self.assertEqual(c.representative_id, r_id) + for c, s in zip(result, cluster_sizes): + self.assertEqual(len(c.sequences),s) + +if __name__ == "__main__": + # test if kClust is available on system, otherwise ignore tests + try: + blastpath=settings.Locate(('kClust')) + except(settings.FileNotFound): + print "Could not find kClust executable: ignoring unit tests" + sys.exit(0) + from ost import testutils + testutils.RunTests() diff --git a/modules/bindings/tests/testfiles/clusters.dmp b/modules/bindings/tests/testfiles/clusters.dmp new file mode 100644 index 000000000..f3974d214 --- /dev/null +++ b/modules/bindings/tests/testfiles/clusters.dmp @@ -0,0 +1,30 @@ +# 29 +1 1 +2 1 +3 1 +4 4 +17 4 +18 4 +5 5 +6 5 +7 5 +10 5 +8 8 +9 9 +13 9 +14 9 +15 9 +16 9 +23 9 +25 9 +11 11 +12 11 +24 11 +19 19 +22 19 +20 20 +21 21 +26 26 +27 26 +28 26 +29 29 diff --git a/modules/bindings/tests/testfiles/fastadb.fasta b/modules/bindings/tests/testfiles/fastadb.fasta new file mode 100644 index 000000000..56895f504 --- /dev/null +++ b/modules/bindings/tests/testfiles/fastadb.fasta @@ -0,0 +1,58 @@ +>P50198.1 +MANRLAGKVALITGGASGLGAAQAKRFAEEGAKVVIGDLNEEMAKGVVAEIRAAGGDALFIRLDVTDAASWNNAIAAAVDGFGGLTTLSNTAGIIHPGGFEEESIEGWNKMVAVNQTAIFLGIKAAIPELVKSGNGSIINISSLIGMFPTAGNASYCATKAAVRIMSKAAALEFVDRGVRVNTIVPGGMNTPITANVPPDVLKQQTSQIPMGKLGDPIDIANGALFLASDEAKYITGVDLPIDGGWSVGV +>Q9WYG0.1 +MNFQGKVVLITGAGSGIGKKAAVMFAERGAKVAINDISEEKGKETVELIKSMGGEAAFIFGDVAKDAEQIVKKTVETFGRLDILVNNAGIVPYGNIEETSEEDFDKTMAVNVKGPFLLSKYAVEQMKKQGGGVIVNVSSEAGLIGIPRRCVYSVSKAALLGLTRSLAVDYVDYGIRVNAVCPGTTQSEGLMARVKASPNPEELLKKMTSRIPMKRLGKEEEIAFAILFAACDEAGFMTGSIINIDGGSTAV +>Q8KWT4.1 +MNLTDKTVLITGGASGIGYAAVQAFLNQQANVVVADIDEAQGEAMIRKENNDRLHFVHTDITDEPACQNAIRSAVDKFGGLDVLINNAGIEIVAPIHEMELSNWNKVLNVNLTGMFLMSKHALKYMLKSGKGNIINTCSVGGVVAWPDIPAYNASKGGVLQLTRSMAVDYAKHNIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P39640.2 +MNLTDKTVLITGGASGIGYAAVQAFLGQQANVVVADIDEAQGEAMVRKENNDRLHFVQTDITDEAACQHAVESAVHTFGGLDVLINNAGIEIVAPIHEMELSDWNKVLQVNLTGMFLMSKHALKHMLAAGKGNIINTCSVGGLVAWPDIPAYNASKGGVLQLTKSMAVDYAKHQIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ +>P46331.2 +MGRLENKTAVITGAATGIGQATAEVFANEGARVIIGDINKDQMEETVDAIRKNGGQAESFHLDVSDENSVKAFADQIKDACGTIDILFNNAGVDQEGGKVHEYPVDLFDRIIAVDLRGTFLCSKYLIPLMLENGGSIINTSSMSGRAADLDRSGYNAAKGGITNLTKAMAIDYARNGIRVNSISPGTIETPLIDKLAGTKEQEMGEQFREANKWITPLGRLGQPKEMATVALFLASDDSSYVTGEDITADGGIMAYTWPGKMLIEEKWKEETK +>P50197.1 +MSDLSGKTIIVTGGGSGIGRATVELLVASGANVPVADINDEAGEAVVATSGGKAAYFRCDIAQEEDVKALVAQTLAAFGGLDGSFNNAAIPQAGLPLAEVSLERFRQSMDINVTGTFLCMKYQILAMIERGTKGSIVNTASAAGVVGVPMHGEYVGAKHAVVGLTRVAAADYGKHGIRVNALVPGAVRTPMLQRAMDNDAGLEPYLNSIHPIGRFSEPHEQAQAAVWLLSDAASFVTGSCLAADGGFTAI +>P42317.2 +MRKQVALVTGAAGGIGFEIAREFAREGASVIVSDLRPEACEKAASKLAEEGFDAAAIPYDVTKEAQVADTVNVIQKQYGRLDILVNNAGIQHVAPIEEFPTDTFEQLIKVMLTAPFIAMKHVFPIMKKQQFGRIINIASVNGLVGFAGKSAYNSAKHGVIGLTKVGALEGAPHGITVNALCPGYVDTQLVRNQLSDLSKTRNVPYDSVLEQVIFPLVPQKRLLSVKEIADYAVFLASEKAKGVTGQAVVLDGGYTAQ +>target +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q59787.1 +MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS +>Q48436.2 +MKKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATAKAVASEINQAGGRAMAVKVDVSDRDQVFAAVEQARKTLGGFDVIVNNAGVAPSTPIESITPEIVDKVYNINVKGVIWGIQAAVEAFKKEGHGGKIINACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGYCPGIVKTPMWAEIDRQVSEAAGKPLGYGTAEFAKRITLGRLSEPEDVAACVSYLASPDSDYMTGQSLLIDGGMVFN +>Q04520.1 +MQKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDVSRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESITEEIVDRVYNINVKGVIWGMQAAVEAFKKEGHGGKIVNACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGFCPGIVKTPMWAEIDRQCRKRRANRWATARLNLPNASPLAACRSLKTSPPACRSSPARIPTI +>P66776.1 +MTNNKVALVTGGAQGIGFKIAERLVEDGFKVAVVDFNEEGAKAAALKLSSDGTKAIAIKADVSNRDDVFNAVRQTAAQFGDFHVMVNNAGLGPTTPIDTITEEQFKTVYGVNVAGVLWGIQAAHEQFKKFNHGGKIINATSQAGVEGNPGLSLYCSTKFAVRGLTQVAAQDLASEGITVNAFAPGIVQTPMMESIAVATAEEAGKPEAWGWEQFTSQIALGRVSQPEDVSNVVSFLAGKDSDYITGQTIIVDGGMRFR +>Q5HKG6.1 +MSKTAIITGSAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAFKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>Q8CQD2.1 +MSKTAIITGAAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAYKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR +>P50199.1 +MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRGKIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFTDWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL +>P0A9Q0.1 +MNDLFSLAGKNILITGSAQGIGFLLATGLGKYGAQIIINDITAERAELAVEKLHQEGIQAVAAPFNVTHKHEIDAAVEHIEKDIGPIDVLVNNAGIQRRHPFTEFPEQEWNDVIAVNQTAVFLVSQAVTRHMVERKAGKVINICSMQSELGRDTITPYAASKGAVKMLTRGMCVELARHNIQVNGIAPGYFKTEMTKALVEDEAFTAWLCKRTPAARWGDPQELIGAAVFLSSKASDFVNGHLLFVDGGMLVAV +>Q05528.1 +MILNTFNLQGKVALITGCDTGLGQGMAVGLAEAGCDIVGVNIVEPKETIEKVTAVGRRFLSLTADMSDISGHAALVEKAVAEFGKVDILVNNAGIIRREDAIEFSEKNWDDVMNLNIKSVFFMSQTVARQFIKQGHGGKIINIASMLSFQGGIPVPSYTASKKRVMGITRIVANEWAKHNINVNAIAPGYMATNNTQHVRADQDRSKEILDRIPAGRWGLPQDLQGPAVFLASSASDYVNGYTIAVDGGWLAR +>P37769.2 +MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPTETIEQVTALGRRFLSLTADLRKIDGIPALLDRAVAEFGHIDILVNNAGLIRREDALEFSEKDWDDVMNLNIKSVFFMSQAAAKHFIAQGNGGKIINIASMLSFQGGIRVPSYTASKSGVMGVTRLMANEWAKHNINVNAIAPGYMATNNTQQLRADEQRSAEILDRIPAGRWGLPSDLMGPIVFLASSASDYVNGYTIAVDGGWLAR +>P50842.1 +MGYLHDAFSLKGKTALVTGPGTGIGQGIAKALAGAGADIIGTSHTSSLSETQQLVEQEGRIFTSFTLDMSKPEAIKDSAAELFENRQIDILVNNAGIIHREKAEDFPEENWQHVLNVNLNSLFILTQLAGRHMLKRGHGKIINIASLLSFQGGILVPAYTASKHAVAGLTKSFANEWAASGIQVNAIAPGYISTANTKPIRDDEKRNEDILKRIPAGRWGQADDIGGTAVFLASRASDYVNGHILAVDGGWLSR +>Q1JP75.1 +MDLRLAGRRALVTGAGKGIGRSIVKALHAAGARVVAVSRTQADLDSLVRECPGVETVCVDLADWEATEQALGGVGPVDLLVNNAAVAFLQPFLEVTKEAYDMSFSVNLRAVIQVSQIVARGLIARGAPGVIVNVSSQASQRGLTNHSVYCSTKGALDTLTKVMAVELGPHKIRVNAVNPTVVMTPMGQAAWSDPQKAKAMLDRIPLGRFAEVENVVDTILFLLSDRSSMTTGSTVPVDGGFLAT +>Q91XV4.1 +MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDSLVSECPGVETVCVDLADWEATEQALSSVGPVDLLVNNAAVALLQPFLEVTKEAFDMSFNVNLRAVIQVSQIVARGMIARGAPGAIVNVSSQASQRALANHSVYCSTKGALDMLTKMMALELGPHKIRVNAVNPTVVMTSMGRTNWSDPHKAKVMLDRIPLGKFAEVENVVDAILFLLSHRSNMTTGSTLPVDGGFLVT +>Q7Z4W1.2 +MELFLAGRRVLVTGAGKGIGRGTVQALHATGARVVAVSRTQADLDSLVRECPGIEPVCVDLGDWEATERALGSVGPVDLLVNNAAVALLQPFLEVTKEAFDRSFEVNLRAVIQVSQIVARGLIARGVPGAIVNVSSQCSQRAVTNHSVYCSTKGALDMLTKVMALELGPHKIRVNAVNPTVVMTSMGQATWSDPHKAKTMLNRIPLGKFAEVEHVVNAILFLLSDRSGMTTGSTLPVEGGFWAC +>P40397.2 +MANQKKKTLPPQHQNQQPGFEYLMDPRPVFDKPKKAKKLEGKTAIITGGDSGIGRAVSVLFAKEGANVVIVYLNEHQDAEETKQYVEKEGVKCLLIAGDVGDEAFCNDVVGQASQVFPSIDILVNNAAEQHVQPSIEKITSHQLIRTFQTNIFSMFYLTKAVLPHLKKGSSIINTASITAYKGNKTLIDYSATKGAIVTFTRSLSQSLVQQGIRVNAVAPGPIWTPLIPASFAAKDVEVFGSDVPMERPGQPVEVAPSYLYLASDDSTYVTGQTIHVNGGTIVNG +>O07575.1 +MNPMDRQTEGQEPQHQDRQPGIESKMNPLPLSEDEDYRGSGKLKGKVAIITGGDSGIGRAAAIAFAKEGADISILYLDEHSDAEETRKRIEKENVRCLLIPGDVGDENHCEQAVQQTVDHFGKLDILVNNAAEQHPQDSILNISTEQLEKTFRTNIFSMFHMTKKALPHLQEGCAIINTTSITAYEGDTALIDYSSTKGAIVSFTRSMAKSLADKGIRVNAVAPGPIWTPLIPATFPEEKVKQHGLDTPMGRPGQPVEHAGAYVLLASDESSYMTGQTIHVNGGRFIST +>Q5KTS5.1 +MASGGQFPPQKQESQPGKEHLMDPSPQHASPHYKPANKLQGKVALVTGGDSGIGRSVCYHFALEGATVAFTFVKGHEDKDANETLELLRKAKSSDAKDPIAIAADLGFDDNCKKVVDQVVNAFGSIDVLVNNAAEQYKASTVEDIDEERLERVFRTNIFAYFFMARHALKHMREGSTIINTTSINAYKGNAKLLDYTATKGAIVAFTRGLSLQLISKGIRVNGVAPGPVWTPLIPSSFDEEEVKQFGSEVPMKRAGQPYEIATAYVFLASCDSSYYSGQVLHPNGGAIVNG +>Q49117.2 +MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYASSKAGADAVVEAITAAGGKAIAVQADVSQAVQARGLVEAAVQQFGRLDVLVNNSGVYEFAAIEEVTEEHYRRIFDVNVLGVLLATQAASKHLGEGGSIINISSVVTDVLMPTSAVYSGTKGALNAISGVLANELAPRKIRVNVVSPGYVVTEGTHTAGIAGSEMEAGLVAQTPLGRSGQPDDIAGVVAFLASDDARWVTGEVINASGGVR +>Q9KQH7.2 +MNLEGKVALVTGASRGIGKAIAELLAERGAKVIGTATSESGAQAISDYLGDNGKGMALNVTNPESIEAVLKAITDEFGGVDILVNNAGITRDNLLMRMKEEEWSDIMETNLTSIFRLSKAVLRGMMKKRQGRIINVGSVVGTMGNAGQANYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDEQRTATLAQVPAGRLGDPREIASAVAFLASPEAAYITGETLHVNGGMYMI +>P55336.1 +MNLEGKIALVTGASRGIGRAIAELLVERGATVIGTATSEGGAAAISEYLGENGKGLALNVTDVESIEATLKTINDECGAIDILVNNAGITRDNLLMRMKDDEWNDIINTNLTPIYRMSKAVLRGMMKKRAGRIINVGSVVGTMGNAGQTNYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDDQRAATLSNVPAGRLGDPREIASAVVFLASPEAAYITGETLHVNGGMYMV +>P0A2C9.1 +MSFEGKIALVTGASRGIGRAIAETLVARGAKVIGTATSENGAKNISDYLGANGKGLMLNVTDPASIESVLENIRAEFGEVDILVNNAGITRDNLLMRMKDDEWNDIIETNLSSVFRLSKAVMRAMMKKRCGRIITIGSVVGTMGNAGQANYAAAKAGLIGFSKSLAREVASRGITVNVVAPGFIETDMTRALSDDQRAGILAQVPAGRLGGAQEIASAVAFLASDEASYITGETLHVNGGMYMV diff --git a/modules/bindings/tests/testfiles/headers.dmp b/modules/bindings/tests/testfiles/headers.dmp new file mode 100644 index 000000000..7c188177f --- /dev/null +++ b/modules/bindings/tests/testfiles/headers.dmp @@ -0,0 +1,29 @@ +1 >Q5KTS5.1 +2 >O07575.1 +3 >P40397.2 +4 >P46331.2 +17 >P39640.2 +18 >Q8KWT4.1 +5 >P66776.1 +6 >Q8CQD2.1 +7 >Q5HKG6.1 +10 >Q48436.2 +8 >P42317.2 +9 >P50199.1 +13 >P50842.1 +14 >P0A9Q0.1 +15 >P37769.2 +16 >Q05528.1 +23 >P0A2C9.1 +25 >Q9KQH7.2 +11 >Q59787.1 +12 >target +24 >P55336.1 +19 >Q9WYG0.1 +22 >Q49117.2 +20 >P50197.1 +21 >P50198.1 +26 >Q7Z4W1.2 +27 >Q91XV4.1 +28 >Q1JP75.1 +29 >Q04520.1 -- GitLab