Skip to content
Snippets Groups Projects
Commit 1e7f33d6 authored by Gabriel Studer's avatar Gabriel Studer
Browse files

added unit tests for kclust binding

parent 9f9408cc
Branches
Tags
No related merge requests found
......@@ -2,6 +2,7 @@ set(OST_BINDINGS_UNIT_TESTS
test_msms.py
test_clustalw.py
test_blast.py
test_kclust.py
)
ost_unittest(MODULE bindings
......
import sys
import unittest
from ost import *
from ost import settings
from ost.bindings import kclust
class TestkClustBindings(unittest.TestCase):
def testkClustExec(self):
seq_list=io.LoadSequenceList('testfiles/multiple.fasta')
clusters40=kclust.kClust(seq_list,clustering_thresh=40)
clusters90=kclust.kClust(seq_list,clustering_thresh=90)
self.assertEqual(len(clusters40),16)
self.assertEqual(len(clusters90),27)
def testkClustParseOutput(self):
representative_ids=['Q5KTS5.1','P46331.2','P66776.1','P42317.2','P50199.1',
'Q59787.1','Q9WYG0.1','P50197.1','P50198.1','Q7Z4W1.2',
'Q04520.1']
cluster_sizes=[3,3,4,1,7,3,2,1,1,3,1]
result=kclust._ParseOutput('testfiles')
self.assertEqual(len(result),11)
for c, r_id in zip(result, representative_ids):
self.assertEqual(c.representative_id, r_id)
for c, s in zip(result, cluster_sizes):
self.assertEqual(len(c.sequences),s)
if __name__ == "__main__":
# test if kClust is available on system, otherwise ignore tests
try:
blastpath=settings.Locate(('kClust'))
except(settings.FileNotFound):
print "Could not find kClust executable: ignoring unit tests"
sys.exit(0)
from ost import testutils
testutils.RunTests()
# 29
1 1
2 1
3 1
4 4
17 4
18 4
5 5
6 5
7 5
10 5
8 8
9 9
13 9
14 9
15 9
16 9
23 9
25 9
11 11
12 11
24 11
19 19
22 19
20 20
21 21
26 26
27 26
28 26
29 29
>P50198.1
MANRLAGKVALITGGASGLGAAQAKRFAEEGAKVVIGDLNEEMAKGVVAEIRAAGGDALFIRLDVTDAASWNNAIAAAVDGFGGLTTLSNTAGIIHPGGFEEESIEGWNKMVAVNQTAIFLGIKAAIPELVKSGNGSIINISSLIGMFPTAGNASYCATKAAVRIMSKAAALEFVDRGVRVNTIVPGGMNTPITANVPPDVLKQQTSQIPMGKLGDPIDIANGALFLASDEAKYITGVDLPIDGGWSVGV
>Q9WYG0.1
MNFQGKVVLITGAGSGIGKKAAVMFAERGAKVAINDISEEKGKETVELIKSMGGEAAFIFGDVAKDAEQIVKKTVETFGRLDILVNNAGIVPYGNIEETSEEDFDKTMAVNVKGPFLLSKYAVEQMKKQGGGVIVNVSSEAGLIGIPRRCVYSVSKAALLGLTRSLAVDYVDYGIRVNAVCPGTTQSEGLMARVKASPNPEELLKKMTSRIPMKRLGKEEEIAFAILFAACDEAGFMTGSIINIDGGSTAV
>Q8KWT4.1
MNLTDKTVLITGGASGIGYAAVQAFLNQQANVVVADIDEAQGEAMIRKENNDRLHFVHTDITDEPACQNAIRSAVDKFGGLDVLINNAGIEIVAPIHEMELSNWNKVLNVNLTGMFLMSKHALKYMLKSGKGNIINTCSVGGVVAWPDIPAYNASKGGVLQLTRSMAVDYAKHNIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ
>P39640.2
MNLTDKTVLITGGASGIGYAAVQAFLGQQANVVVADIDEAQGEAMVRKENNDRLHFVQTDITDEAACQHAVESAVHTFGGLDVLINNAGIEIVAPIHEMELSDWNKVLQVNLTGMFLMSKHALKHMLAAGKGNIINTCSVGGLVAWPDIPAYNASKGGVLQLTKSMAVDYAKHQIRVNCVCPGIIDTPLNEKSFLENNEGTLEEIKKEKAKVNPLLRLGKPEEIANVMLFLASDLSSYMTGSAITADGGYTAQ
>P46331.2
MGRLENKTAVITGAATGIGQATAEVFANEGARVIIGDINKDQMEETVDAIRKNGGQAESFHLDVSDENSVKAFADQIKDACGTIDILFNNAGVDQEGGKVHEYPVDLFDRIIAVDLRGTFLCSKYLIPLMLENGGSIINTSSMSGRAADLDRSGYNAAKGGITNLTKAMAIDYARNGIRVNSISPGTIETPLIDKLAGTKEQEMGEQFREANKWITPLGRLGQPKEMATVALFLASDDSSYVTGEDITADGGIMAYTWPGKMLIEEKWKEETK
>P50197.1
MSDLSGKTIIVTGGGSGIGRATVELLVASGANVPVADINDEAGEAVVATSGGKAAYFRCDIAQEEDVKALVAQTLAAFGGLDGSFNNAAIPQAGLPLAEVSLERFRQSMDINVTGTFLCMKYQILAMIERGTKGSIVNTASAAGVVGVPMHGEYVGAKHAVVGLTRVAAADYGKHGIRVNALVPGAVRTPMLQRAMDNDAGLEPYLNSIHPIGRFSEPHEQAQAAVWLLSDAASFVTGSCLAADGGFTAI
>P42317.2
MRKQVALVTGAAGGIGFEIAREFAREGASVIVSDLRPEACEKAASKLAEEGFDAAAIPYDVTKEAQVADTVNVIQKQYGRLDILVNNAGIQHVAPIEEFPTDTFEQLIKVMLTAPFIAMKHVFPIMKKQQFGRIINIASVNGLVGFAGKSAYNSAKHGVIGLTKVGALEGAPHGITVNALCPGYVDTQLVRNQLSDLSKTRNVPYDSVLEQVIFPLVPQKRLLSVKEIADYAVFLASEKAKGVTGQAVVLDGGYTAQ
>target
MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS
>Q59787.1
MRLDGKTALITGSARGIGRAFAEAYVREGARVAIADINLEAARATAAEIGPAACAIALDVTDQASIDRCVAELLDRWGSIDILVNNAALFDLAPIVEITRESYDRLFAINVSGTLFMMQAVARAMIAGGRGGKIINMASQAGRRGEALVGVYCATKAAVISLTQSAGLNLIRHGINVNAIAPGVVDGEHWDGVDAKFADYENLPRGEKKRQVGAAVPFGRMGRAEDLTGMAIFLATPEADYIVAQTYNVDGGNWMS
>Q48436.2
MKKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATAKAVASEINQAGGRAMAVKVDVSDRDQVFAAVEQARKTLGGFDVIVNNAGVAPSTPIESITPEIVDKVYNINVKGVIWGIQAAVEAFKKEGHGGKIINACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGYCPGIVKTPMWAEIDRQVSEAAGKPLGYGTAEFAKRITLGRLSEPEDVAACVSYLASPDSDYMTGQSLLIDGGMVFN
>Q04520.1
MQKVALVTGAGQGIGKAIALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDVSRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESITEEIVDRVYNINVKGVIWGMQAAVEAFKKEGHGGKIVNACSQAGHVGNPELAVYSSSKFAVRGLTQTAARDLAPLGITVNGFCPGIVKTPMWAEIDRQCRKRRANRWATARLNLPNASPLAACRSLKTSPPACRSSPARIPTI
>P66776.1
MTNNKVALVTGGAQGIGFKIAERLVEDGFKVAVVDFNEEGAKAAALKLSSDGTKAIAIKADVSNRDDVFNAVRQTAAQFGDFHVMVNNAGLGPTTPIDTITEEQFKTVYGVNVAGVLWGIQAAHEQFKKFNHGGKIINATSQAGVEGNPGLSLYCSTKFAVRGLTQVAAQDLASEGITVNAFAPGIVQTPMMESIAVATAEEAGKPEAWGWEQFTSQIALGRVSQPEDVSNVVSFLAGKDSDYITGQTIIVDGGMRFR
>Q5HKG6.1
MSKTAIITGSAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAFKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR
>Q8CQD2.1
MSKTAIITGAAGGLGKGIAERLANDGFNIVLQDINEALLLETEKEFKEKGYQAVAYKSDVSKKKEQEELVQFAVTEFGQLDVMVNNAGVDAVTPILEIGEEELSKLFNINVFGTLFGIQAAANQFIKQKSKGKIINACSIAGHESYEVLGTYSATKHSVRSFTQTAAKELADKGITVNAYCPGVAKTEMWDRIDEEMVKLDDSLEIGDAFEAFSSEIKLGRYQEPSDVANLVSFLASNDSDYITGQSILTDGGLVYR
>P50199.1
MSHPDLFSLSGARALVTGASRGIGLTLAKGLARYGAEVVLNGRNAESLDSAQSGFEAEGLKASTAVFDVTDQDAVIDGVAAIERDMGPIDILINNAGIQRRAPLEEFSRKDWDDLMSTNVNAVFFVGQAVARHMIPRGRGKIVNICSVQSELARPGIAPYTATKGAVKNLTKGMATDWGRHGLQINGLAPGYFATEMTERLVADEEFTDWLCKRTPAGRWGQVEELVGAAVFLSSRASSFVNGQVLMVDGGITVSL
>P0A9Q0.1
MNDLFSLAGKNILITGSAQGIGFLLATGLGKYGAQIIINDITAERAELAVEKLHQEGIQAVAAPFNVTHKHEIDAAVEHIEKDIGPIDVLVNNAGIQRRHPFTEFPEQEWNDVIAVNQTAVFLVSQAVTRHMVERKAGKVINICSMQSELGRDTITPYAASKGAVKMLTRGMCVELARHNIQVNGIAPGYFKTEMTKALVEDEAFTAWLCKRTPAARWGDPQELIGAAVFLSSKASDFVNGHLLFVDGGMLVAV
>Q05528.1
MILNTFNLQGKVALITGCDTGLGQGMAVGLAEAGCDIVGVNIVEPKETIEKVTAVGRRFLSLTADMSDISGHAALVEKAVAEFGKVDILVNNAGIIRREDAIEFSEKNWDDVMNLNIKSVFFMSQTVARQFIKQGHGGKIINIASMLSFQGGIPVPSYTASKKRVMGITRIVANEWAKHNINVNAIAPGYMATNNTQHVRADQDRSKEILDRIPAGRWGLPQDLQGPAVFLASSASDYVNGYTIAVDGGWLAR
>P37769.2
MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPTETIEQVTALGRRFLSLTADLRKIDGIPALLDRAVAEFGHIDILVNNAGLIRREDALEFSEKDWDDVMNLNIKSVFFMSQAAAKHFIAQGNGGKIINIASMLSFQGGIRVPSYTASKSGVMGVTRLMANEWAKHNINVNAIAPGYMATNNTQQLRADEQRSAEILDRIPAGRWGLPSDLMGPIVFLASSASDYVNGYTIAVDGGWLAR
>P50842.1
MGYLHDAFSLKGKTALVTGPGTGIGQGIAKALAGAGADIIGTSHTSSLSETQQLVEQEGRIFTSFTLDMSKPEAIKDSAAELFENRQIDILVNNAGIIHREKAEDFPEENWQHVLNVNLNSLFILTQLAGRHMLKRGHGKIINIASLLSFQGGILVPAYTASKHAVAGLTKSFANEWAASGIQVNAIAPGYISTANTKPIRDDEKRNEDILKRIPAGRWGQADDIGGTAVFLASRASDYVNGHILAVDGGWLSR
>Q1JP75.1
MDLRLAGRRALVTGAGKGIGRSIVKALHAAGARVVAVSRTQADLDSLVRECPGVETVCVDLADWEATEQALGGVGPVDLLVNNAAVAFLQPFLEVTKEAYDMSFSVNLRAVIQVSQIVARGLIARGAPGVIVNVSSQASQRGLTNHSVYCSTKGALDTLTKVMAVELGPHKIRVNAVNPTVVMTPMGQAAWSDPQKAKAMLDRIPLGRFAEVENVVDTILFLLSDRSSMTTGSTVPVDGGFLAT
>Q91XV4.1
MDLGLAGRRALVTGAGKGIGRSTVLALQAAGAHVVAVSRTQADLDSLVSECPGVETVCVDLADWEATEQALSSVGPVDLLVNNAAVALLQPFLEVTKEAFDMSFNVNLRAVIQVSQIVARGMIARGAPGAIVNVSSQASQRALANHSVYCSTKGALDMLTKMMALELGPHKIRVNAVNPTVVMTSMGRTNWSDPHKAKVMLDRIPLGKFAEVENVVDAILFLLSHRSNMTTGSTLPVDGGFLVT
>Q7Z4W1.2
MELFLAGRRVLVTGAGKGIGRGTVQALHATGARVVAVSRTQADLDSLVRECPGIEPVCVDLGDWEATERALGSVGPVDLLVNNAAVALLQPFLEVTKEAFDRSFEVNLRAVIQVSQIVARGLIARGVPGAIVNVSSQCSQRAVTNHSVYCSTKGALDMLTKVMALELGPHKIRVNAVNPTVVMTSMGQATWSDPHKAKTMLNRIPLGKFAEVEHVVNAILFLLSDRSGMTTGSTLPVEGGFWAC
>P40397.2
MANQKKKTLPPQHQNQQPGFEYLMDPRPVFDKPKKAKKLEGKTAIITGGDSGIGRAVSVLFAKEGANVVIVYLNEHQDAEETKQYVEKEGVKCLLIAGDVGDEAFCNDVVGQASQVFPSIDILVNNAAEQHVQPSIEKITSHQLIRTFQTNIFSMFYLTKAVLPHLKKGSSIINTASITAYKGNKTLIDYSATKGAIVTFTRSLSQSLVQQGIRVNAVAPGPIWTPLIPASFAAKDVEVFGSDVPMERPGQPVEVAPSYLYLASDDSTYVTGQTIHVNGGTIVNG
>O07575.1
MNPMDRQTEGQEPQHQDRQPGIESKMNPLPLSEDEDYRGSGKLKGKVAIITGGDSGIGRAAAIAFAKEGADISILYLDEHSDAEETRKRIEKENVRCLLIPGDVGDENHCEQAVQQTVDHFGKLDILVNNAAEQHPQDSILNISTEQLEKTFRTNIFSMFHMTKKALPHLQEGCAIINTTSITAYEGDTALIDYSSTKGAIVSFTRSMAKSLADKGIRVNAVAPGPIWTPLIPATFPEEKVKQHGLDTPMGRPGQPVEHAGAYVLLASDESSYMTGQTIHVNGGRFIST
>Q5KTS5.1
MASGGQFPPQKQESQPGKEHLMDPSPQHASPHYKPANKLQGKVALVTGGDSGIGRSVCYHFALEGATVAFTFVKGHEDKDANETLELLRKAKSSDAKDPIAIAADLGFDDNCKKVVDQVVNAFGSIDVLVNNAAEQYKASTVEDIDEERLERVFRTNIFAYFFMARHALKHMREGSTIINTTSINAYKGNAKLLDYTATKGAIVAFTRGLSLQLISKGIRVNGVAPGPVWTPLIPSSFDEEEVKQFGSEVPMKRAGQPYEIATAYVFLASCDSSYYSGQVLHPNGGAIVNG
>Q49117.2
MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYASSKAGADAVVEAITAAGGKAIAVQADVSQAVQARGLVEAAVQQFGRLDVLVNNSGVYEFAAIEEVTEEHYRRIFDVNVLGVLLATQAASKHLGEGGSIINISSVVTDVLMPTSAVYSGTKGALNAISGVLANELAPRKIRVNVVSPGYVVTEGTHTAGIAGSEMEAGLVAQTPLGRSGQPDDIAGVVAFLASDDARWVTGEVINASGGVR
>Q9KQH7.2
MNLEGKVALVTGASRGIGKAIAELLAERGAKVIGTATSESGAQAISDYLGDNGKGMALNVTNPESIEAVLKAITDEFGGVDILVNNAGITRDNLLMRMKEEEWSDIMETNLTSIFRLSKAVLRGMMKKRQGRIINVGSVVGTMGNAGQANYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDEQRTATLAQVPAGRLGDPREIASAVAFLASPEAAYITGETLHVNGGMYMI
>P55336.1
MNLEGKIALVTGASRGIGRAIAELLVERGATVIGTATSEGGAAAISEYLGENGKGLALNVTDVESIEATLKTINDECGAIDILVNNAGITRDNLLMRMKDDEWNDIINTNLTPIYRMSKAVLRGMMKKRAGRIINVGSVVGTMGNAGQTNYAAAKAGVIGFTKSMAREVASRGVTVNTVAPGFIETDMTKALNDDQRAATLSNVPAGRLGDPREIASAVVFLASPEAAYITGETLHVNGGMYMV
>P0A2C9.1
MSFEGKIALVTGASRGIGRAIAETLVARGAKVIGTATSENGAKNISDYLGANGKGLMLNVTDPASIESVLENIRAEFGEVDILVNNAGITRDNLLMRMKDDEWNDIIETNLSSVFRLSKAVMRAMMKKRCGRIITIGSVVGTMGNAGQANYAAAKAGLIGFSKSLAREVASRGITVNVVAPGFIETDMTRALSDDQRAGILAQVPAGRLGGAQEIASAVAFLASDEASYITGETLHVNGGMYMV
1 >Q5KTS5.1
2 >O07575.1
3 >P40397.2
4 >P46331.2
17 >P39640.2
18 >Q8KWT4.1
5 >P66776.1
6 >Q8CQD2.1
7 >Q5HKG6.1
10 >Q48436.2
8 >P42317.2
9 >P50199.1
13 >P50842.1
14 >P0A9Q0.1
15 >P37769.2
16 >Q05528.1
23 >P0A2C9.1
25 >Q9KQH7.2
11 >Q59787.1
12 >target
24 >P55336.1
19 >Q9WYG0.1
22 >Q49117.2
20 >P50197.1
21 >P50198.1
26 >Q7Z4W1.2
27 >Q91XV4.1
28 >Q1JP75.1
29 >Q04520.1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment