diff --git a/modules/bindings/tests/test_hhblits.py b/modules/bindings/tests/test_hhblits.py index 2c0b2d9c28649d6853178851ecec00174db2cba0..836a3a78d74a02f9fc3c2b91c98b7ed682961dc1 100644 --- a/modules/bindings/tests/test_hhblits.py +++ b/modules/bindings/tests/test_hhblits.py @@ -135,8 +135,9 @@ class TestHHblitsBindings(unittest.TestCase): 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) - a3m = self.hh.BuildQueryMSA('testfiles/hhblitsdb/unittestdb') - self.assertTrue(filecmp.cmp(a3m, "testfiles/testali_two.a3m")) + a3m = self.hh.BuildQueryMSA('testfiles/hhblitsdb/unittestdb', + assign_ss = False) + self.assertTrue(filecmp.cmp(a3m, "testfiles/testali_two_no_ss.a3m")) def testA3mToProfileFileName(self): # test A3mToProfile to work with a given hhmake_file name diff --git a/modules/bindings/tests/testfiles/testali_two_no_ss.a3m b/modules/bindings/tests/testfiles/testali_two_no_ss.a3m new file mode 100644 index 0000000000000000000000000000000000000000..ffa37f6270efe169102f7bdf17fdd57ba1922a7a --- /dev/null +++ b/modules/bindings/tests/testfiles/testali_two_no_ss.a3m @@ -0,0 +1,224 @@ +>Test +VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR +>gi|118595805|sp|P02185.2|MYG_PHYCA RecName: Full=Myoglobingi|49259352|pdb|1UFP|A Chain A, Crystal Structure Of An Artificial Metalloprotein:fe(Iii)(3, 3'-Me2-Salophen)APO-Wild Type Myoglobingi|145579858|pdb|2JHO|A Chain A, Cyanomet Sperm Whale Myoglobin At 1.4a Resolutiongi|151567563|pdb|2EB8|A Chain A, Crystal Structure Of Cu(Ii)(Sal-Phe)APO-Myoglobingi|151567564|pdb|2EB9|A Chain A, Crystal Structure Of Cu(Ii)(Sal-Leu)APO-Myoglobingi|228311816|pdb|2W6W|A Chain A, Crystal Structure Of Recombinant Sperm Whale Myoglobin Under 1atm Of Xenongi|113374037|dbj|BAF03579.1| myoglobin [Physeter catodon] +VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFkHLKTeaemkASEDLKKHGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRHPGDFGADAQGAMNKALELFRKDIAAKYK +>gi|118595767|sp|P14398.2|MYG_GALJA RecName: Full=Myoglobin +-----NWDKVNSVWSAVEQNITAIGQNILLRLFEQYPESEDYFPKLkN-KSlgelkDTADIKAQADTVLRALGNIVKKKGDHSQPVKALAATHITTHKIPPHYFTKITTIAVGVLSEMYPSEMNAQAQAAFSGAFKNICSDIEKEYK +>gi|47226106|emb|CAG04480.1| unnamed protein product [Tetraodon nigroviridis] +VWTDQERAIIDNIFSNL--DYEDVGSKALIRCLIVYPWTQRYFSSFgNLYnaeaiRNNPNVAKHGVTVLHGLDRALKNMDNIKEEYKKLS--ERAALREAARRPRQLCSLTA----------------------------------- +>gi|5139299|gb|AAD40480.1|AF157494_1 hemoglobin [Myxine glutinosa] +-TTEGERAAVRASWAVLMKDYEHAGVQILDKFFKANPAAKPFFTKMKdLhtlEdlASSADARWHVERIIQAVNFAVINIEDrekLSNKFVKLSQDHIEEFHVtDPQYFMILSQTILDEVEKRN---------GGlsgegksgWHKVMTIICKMLKSKY- +>gi|74922327|sp|Q7SID0.1|GLBF1_EPTBU RecName: Full=Globin-F1gi|18655670|pdb|1IT2|A Chain A, Hagfish Deoxy Hemoglobingi|18655671|pdb|1IT2|B Chain B, Hagfish Deoxy Hemoglobingi|18655672|pdb|1IT3|A Chain A, Hagfish Co Ligand Hemoglobingi|18655673|pdb|1IT3|B Chain B, Hagfish Co Ligand Hemoglobingi|18655674|pdb|1IT3|C Chain C, Hagfish Co Ligand Hemoglobingi|18655675|pdb|1IT3|D Chain D, Hagfish Co Ligand Hemoglobin +-LTDGDKKAINKIWPKIYKEYEQYSLNILLRFLKCFPQAQASFPKFS-tkkSnlEQDPEVKHQAVVIFNKVNEIINSMDNqeeIIKSLKDLSQKHKTVFKVDSIWFKELSSIFVSTI-DGG---------AEFEKLFSIICILLRSAY- +>gi|157880060|pdb|1SCT|A Chain A, Scapharca Tetrameric Hemoglobin, Co-Stategi|157880061|pdb|1SCT|C Chain C, Scapharca Tetrameric Hemoglobin, Co-Stategi|157880062|pdb|1SCT|E Chain E, Scapharca Tetrameric Hemoglobin, Co-Stategi|157880063|pdb|1SCT|G Chain G, Scapharca Tetrameric Hemoglobin, Co-State +CGSEAIKANLRRSWGVLSADIEATGLMLMSNLFTLRPDTKTYFTRLGDvqkGKANSKLRGHAITLTYALNNFVDSLDDPSRLKCVVEKFAVNHInrKISGDAFGAIVEPMKETLKARMGNYYSDDVAGAWAALVGVVQA------- +>gi|153873469|ref|ZP_02002042.1| Globin [Beggiatoa sp. PS]gi|152070062|gb|EDN67955.1| Globin [Beggiatoa sp. PS] +-----NTELIGQSWDKLAGKHEEMVATFYDRFFDKFPHYRKFFPE--------SMEHQLKRMAETIALLARVTHETEVTHPHLVKVgsrHT-GYCLAREDLDNFKTIFVQVVGEYCGDDWNQEYQESWTEAFE--qhiIPYMMH--- +>gi|215764931|dbj|BAG86629.1| globin [Samia cynthia ricini] +-LTRREIYAVQKSWAPVFANSIPNGAELLRRLFQTFPETKEFFKMIrKLPdeeyIQNPQFRAHVINLMTSLNLAVNNLNQPEVVaamMNKLGESHK-RRQIKERHFGDLKQVIVTMFIEVL--HLDGDTLTAWDKTVTFWYKHI----- +>gi|15613621|ref|NP_241924.1| nitric oxide dioxygenase [Bacillus halodurans C-125]gi|52000645|sp|Q9RC40.1|HMP_BACHD RecName: Full=Flavohemoprotein; AltName: Full=Flavohemoglobin; AltName: Full=Hemoglobin-like protein; AltName: Full=Nitric oxide dioxygenase; Short=NO oxygenase; Short=NODgi|5822815|dbj|BAA83959.1| HMP [Bacillus halodurans]gi|10173673|dbj|BAB04777.1| flavohemoglobin [Bacillus halodurans C-125] +TLSQETKQIVKATVPILAEHGEAITKHFYKRMFSHHPELLNIFNQ-----THQKQGRQPQALANSIYAAAEHIDNLEAILPVVSRIahkHR-SLNIKPEQYPIVGENLLAAMREVLGDAASDDVLEAWREAYELIADVFIQ--- +>gi|183980669|ref|YP_001848960.1| monooxygenase [Mycobacterium marinum M]gi|183173995|gb|ACC39105.1| monooxygenase [Mycobacterium marinum M] +---RDALRVLRDAFAPQpdldhKTQSSELVRSFYTNWFSLDSSVRDLFP--------PEMSGQRAAFTRALHWVYSELvaQRAEEPIAFLAQLgrdHR-KYGVQPTQYETLRRALQTTLRSHLGSSWTDSVARAAEQSLNLIVGVMSG--- +>gi|119368204|sp|P15161.2|GLB1_PARCH RecName: Full=Globin-1; AltName: Full=Globin I +-LTLAQKKIVRKTWHQLMRNKTSFVTDLFIRIFAYDPAAQNKFPQMaGMSAsqlrSSRQMQAHAIRVSSIMSEYIEELDSdiLPELLATLARTHD-LNKVGPAHYDLFAKVLMEALQAELGSDFNQKTRDSWAKAFSIVQAVLLVK-- +>gi|340378768|ref|XP_003387899.1| PREDICTED: neuroglobin-like [Amphimedon queenslandica] +SLTSAQVALIESTWKVVKKDLQGAGNIMFLKLFQIDVSVRDKFPFRDVpyEelEDSESFLKHSLQVMETIDLAITLLlgGEMEKLVEALVDLgmaHA-MQGLKPEDFDHVGEALVHALGVALGKEFNDEAKKAWTLLYSVVTAKMKEGLK +>gi|255642839|gb|ACU21602.1| extracellular tetra-domain globin [Branchipolynoe seepensis] +-ISSEQKAAIKTSWAGA--NLQAAGTGFYVHLAADAPAAYAVFN-LGANPHGAKAQAQGLKVMQFVDDCVKSLDDMAAVRDKLEVLahrHT-GYPAKKEYFGPGKACFLAGLADALGAKFTAEAKAAWATFYDIIAISLCQ--- +>gi|327360034|emb|CBL51559.1| globin [Branchiostoma floridae] +SLSAADKKAVADSWAKMSkPSFQDAGERVFLKLLKK-DSTKAMFKKFkDIPrdrlAGNAALRDHGGKVVQALDDFIKGLDGsGHETVRNVGRIHK-AAGMTNDNINLMKPILLELLDEV-G--CG-DAKAAWDKLWNLFMTVHG---- +>gi|1170183|sp|P42511.1|LGB_CANLI RecName: Full=Leghemoglobingi|495289|gb|AAA18503.1| leghemoglobin [Canavalia lineata] +-FSEKQESLVKSSWEAFKQNVPHHSAVFYTLILEKAPAAQNMFSFLSNGvdPNNPKLKAHAEKVFKMTVDSAVQLRAKGEVvlaDPTLGSVHV-QKGVLDPHFLVVKEALLKTFKEAVGDKWNDELGNAWEVAYDELAAAIKKAM- +>gi|254474495|ref|ZP_05087881.1| Globin, putative [Ruegeria sp. R11]gi|214028738|gb|EEB69573.1| Globin, putative [Ruegeria sp. R11] +-VAPADRDLILASVESQKMELDQFVSLFYAKFFERCPDTRPMFPH--------DMSLQEEKLLMSLTHIIEALEHpakLRLILLDQGERHK-ALQINDDHFAGFIDSFTGALKDTLQEDWSEETRQAWLRFLQYVAYQMG---- +>gi|121228|sp|P15447.2|GLB4_GLYDI RecName: Full=Globin, monomeric component M-IV; AltName: Full=GMH4gi|159149|gb|AAA29162.1| hemoglobin IV [Glycera dibranchiata] +GLSAAQRQVVASTWKDIAgsDNGAGVGKECFTKFLSAHHDIAAVFG-F--SgASDPGVADLGAKVLAQIGVAVSHLGDEGKMVAEMKAVGVRhkGYgykHIKAEYFEPLGASLLSAMEHRIGGKMTAAAKDAWAAAYADISGALISG-- +>gi|62990081|emb|CAI56309.1| haemoglobin B2 chain [Arenicola marina] +--TTEDRKEVQTLWSEIWSAQftgrrVQVAQAVFEDLFRRDPESKNLFKRVNVdDMNSPEFHAHCIRVVNGLDTVIGLLDDPDTLKSQLEHLaqqHKERDGIHKTHFDEMSHAFGAVMPQVS-SCFNP---DAWNRCFGSIATKIASL-- +>gi|34447132|dbj|BAC84992.1| globin D2 precursor [Haemadipsa zeylanica] +--SIEDIRDIQHDWQFTWGDAsldarIVFGQAVFKKLIELDSSVVEPLKGVHVeDPNSLTFKNHVLRVLNGLDNLINLFDEQGVLVSQLNHLsqqHKERAGVNAAHFKAFARAFIDVLEVSG-NCPNL---DAWKGCLAALGHRISLQ-- +>gi|156405932|ref|XP_001640985.1| predicted protein [Nematostella vectensis]gi|156228122|gb|EDO48922.1| predicted protein [Nematostella vectensis] +PLSVAQKYLVRETWETIEQHSKAVGKKTFLrmfymssidfiysvvmeskgskdirvlglelafddvknsyrtwRFFEMNPDYQKLFPEFaTLDqvelEQANALHGHAKRVMKAVENAVSAMDDAESFAAYLENLGARhkARALKPAYLDAMQVAYTDTIQDLLKTQWTDGTAEAWNKLFRFIADTMKHG-- +>gi|259418086|ref|ZP_05742005.1| globin [Silicibacter sp. TrichCH4B]gi|259346992|gb|EEW58806.1| globin [Silicibacter sp. TrichCH4B] +-LHQIDAQLVKGSFRQVFARKAALTDKFYEYLFLEMPDTKAMFTG--------DFSHQKEMFASVLAAGVRSLGQDAEllaLIDRLLLRHR-HLGLTSGHMYMAQRALLLAFREVMGPHLTAAEVSAWGAAIRRLCQTLAA--- +>gi|86137140|ref|ZP_01055718.1| globin domain protein [Roseobacter sp. MED193]gi|85826464|gb|EAQ46661.1| globin domain protein [Roseobacter sp. MED193] +---------MRSSFPTIFARKAELADRFYSHLFVHLPEVESLFGD--------DFSKQKEMFAAMLTYCLKGVANSQSlpqARAGLVKVHA-RFNLGPREMELAGKAVMAALEDVLGNDLSTKQREIWEQAISGVMQLLLA--- +>gi|301122887|ref|XP_002909170.1| Nitric oxide dioxygenase (Pi-NOD1) [Phytophthora infestans T30-4]gi|262099932|gb|EEY57984.1| Nitric oxide dioxygenase (Pi-NOD1) [Phytophthora infestans T30-4] +PMSAKTIAILKATAPVVQEHGTEITSTMYGTMFSEFPEVQNLFNMShhRVAgatKggAPPGVSRQATALANAVIGFAANCDQLGNLgdaVPRMVHKHV-SLDIRAKHYPIVGGCLLRAIKTVLGDAATDEIIDAWKEAYWFLADLLI---- +>gi|8101728|gb|AAF72633.1|AF258616_1 hemoglobin P polymer precursor [Parartemia zietziana] +-ITDAEKQLVQESWELLKPDLMGLGQKVFGRIFTKNPEYQTLFTRvgFgDTplTqlMANPAYGAHLIKVMRSFDFVIQNLGKPKTLLAYLKNVGADhiARNVERRHLQAFSESLIPVMQNELKAKLKPEAVAAWRKGLDRIIGVID---- +>gi|291238614|ref|XP_002739227.1| PREDICTED: Globin D, coelomic, putative-like [Saccoglossus kowalevskii] +-LTKEQKDTLIQTWQNLHADLERIGMLMFMGLFEHNPEIKEFFVGADsrdMKteelRYNEKLQEHGIRVMGLVEKIISSMGfedeKIDQMVVDLGKRHL-GYDVHIPFIDLFGRQFVFAIKPTLHTHWTANVEEAWTQLFKYIGYLMRYGYH +>gi|254426005|ref|ZP_05039722.1| Nitric oxide synthase, oxygenase domain protein [Synechococcus sp. PCC 7335]gi|196188428|gb|EDX83393.1| Nitric oxide synthase, oxygenase domain protein [Synechococcus sp. PCC 7335] +-LPPALVQKMADSWQYFAPRKNEMGVEFYQTLFERYPQVLPIFGR-------ADMDYLSTHLFQSLEFIFLCLAEGSTerLmkeLRHLGRLHG-NAGVPSFAYGAISEVMISMFEKYVP-GFDEQLKEAWQVLIARVSNVIK---- +>gi|312385086|gb|EFR29666.1| hypothetical protein AND_01190 [Anopheles darlingi] +PLTAKQKYTMVASWKGISRAMETTGITMFIKLFEEHADLLNMFAKFrELKTkeeqaTSEELQEHANKVMNTLDEGIRGLDDLDTFFEFIHQVgasHRRIPGFKQEYFWRIEEPFLTAVSTTLGDRYTQNVEGIYKLTIKFIIETLVAGY- +>gi|50897135|dbj|BAD34601.1| hemoglobin chain I [Calyptogena kaikoi] +-VSASDIKNVQDTWTKLYDQWeAVHASKFYNKLFKDNEDISEAFVKAGT-GSGIAMKRQALVFGAILQEFVENLSDPTALSLKIKGLcatHK-TRGItNMELFAFALADLVAYMGTT--ISFTAAQKTSWTAVNDVILHQMSSY-- +>gi|156392484|ref|XP_001636078.1| predicted protein [Nematostella vectensis]gi|156223178|gb|EDO44015.1| predicted protein [Nematostella vectensis] +-MTYEQKYLIRETVDNRECVNEkdflawRYVCELAAIFLNMHPGLQTYFSEFKhIKIDNiNGSHGHPRRLLMAIDNAVTALGDSDSFSAYLVELGRrhhgMNFRPGPTHFNDLRKCFLSVIKEILATAslWDFQVEEAWNRLFDSITAMML---- +>gi|321471887|gb|EFX82859.1| hemoglobin [Daphnia pulex] +-LTRPQIRNVQRSWESMKSGRNSLVSAIFIKLFKETPRVQKHFAKFaNVPvdslRGNGDYIQQVALVADRLDTLISAMDDQLQLlgnINYLKYTHA-KRSIPRKTWEDFARLLVELLPTR---GVSASDVESWKGVTTVLVNGIA---- +>gi|321477133|gb|EFX88092.1| hemoglobin [Daphnia pulex] +-LTPQQIKEVQRTWASMRSDRNSIVSAIFIELFRENPRSQKYFAKFaSLPleslTSNTDFNQQVALVANRLDTIISAMGDKLQLlgnINYMRYSHEQriysPRNAVRDRFEDFGRLLLDTLIAK---GIAGDDLDSWKSVLKIFIDGIA---- +>gi|56753437|gb|AAW24922.1| SJCHGC09035 protein [Schistosoma japonicum] +-INDEQLLLLQSSWSIVKQHIEKIGVITFLGIFEQHSDFRDAFTEFRKrkfvdVKHDPAMQVHGLRVLSIVDKMITRLPktdDIELKLMTIGSKHC-RYVPTIGLISSVSDQLWGAIEPVLKeeGSWSDELAVTWKTVLDYLTKT------ +>gi|332532408|ref|ZP_08408286.1| hypothetical protein PH505_af00140 [Pseudoalteromonas haloplanktis ANT/505]gi|332038051|gb|EGI74498.1| hypothetical protein PH505_af00140 [Pseudoalteromonas haloplanktis ANT/505] +------QSALLNNLTIIKPNFHAFTAKFHSKLAQSSIEMNYP--------TALQFNEKSFTLFCVLERIVKHLDKPASVAPFLAHHlmYLKKSGASHSDIALLSNAFYETLEEHLGKHFTTESQLAWKKALRYF--------- +>gi|308480175|ref|XP_003102295.1| CRE-GLB-30 protein [Caenorhabditis remanei]gi|308262221|gb|EFP06174.1| CRE-GLB-30 protein [Caenorhabditis remanei] +HLTPIDREILNKSWAIVSKDMQQVAVNIFQMIFEQAPDAKLMFSFMmkDYkeDKKSNEFIFHAVRFLQVIESTMTHLDDPSqldAVFLNLGKIHAkheEQLGFSAHYWSVFKECVLFHFRKAMKahNKFSkhkemsfAEIDSaiiLWREVLRFIIDRMKVGY- +>gi|82655305|emb|CAI26300.1| hemoglobin [Biomphalaria glabrata] +-LSDNDREAIDSSWKKLRSGAggrRNAGIKLVQWMLRTIPNMRDRFNKFDAKqsdaalQRDPEFLAQVDRILGGVESLVNNVDDPVALKAAIDRLadaHLsFDPRVGLDYFGPLQTYIHDYIEEALGVGADSDEAKGWTDLFAAFNKVLKE--- +>gi|339250872|ref|XP_003374421.1| putative myoglobin [Trichinella spiralis]gi|164521930|gb|ABY60753.1| unknown [Trichinella spiralis]gi|316969271|gb|EFV53396.1| putative myoglobin [Trichinella spiralis] +------CDLIKEQWAKIE-INNENGGELYKWFFTEKPEFATYFQLDNVNPaeiaKTERFQALGKAFLERVKKLVNVCDDEKKLKSEVTILkneHD-PRNVGLDQLKQVRPILVKFLQSKTG--LTDQQTSAWDEMLKKFEA------- +>gi|91224749|ref|ZP_01260009.1| hypothetical protein V12G01_01110 [Vibrio alginolyticus 12G01]gi|91190295|gb|EAS76564.1| hypothetical protein V12G01_01110 [Vibrio alginolyticus 12G01] +--------VFNDSYERCT-SDQEFFDLFYKNLWSKSANFRQKFDGI-------DMHQQVRMLRGSIVFFMMADTstEAHKMVEKYGKKHAsADIGIEPQDFDVWLESLLETVRQCD-SGYDADVETAWRTCFKTGLEVMKQEC- +>gi|88813510|ref|ZP_01128744.1| hypothetical protein NB231_06775 [Nitrococcus mobilis Nb-231]gi|88789223|gb|EAR20356.1| hypothetical protein NB231_06775 [Nitrococcus mobilis Nb-231] +--------LFNDSYERCIDnpNPPGFLQRFYKVFLSSSEEVAEKFKNT-------DFEKQTRVLKASLYYLMLScNGspEAMAHLRRIACLHSrKQLDIRPELYDLWLASLLQAAREYD-PRFDQQTETAWRQVLSHGIDFMKSRY- +>gi|339234991|ref|XP_003379050.1| putative globin [Trichinella spiralis]gi|316978322|gb|EFV61323.1| putative globin [Trichinella spiralis] +-LTKSQRQNVVRSWEKV-PNKRALGEEIYIQIFMHKPMLKSLFPFRTVPvdqlRNNALFTRQAAIFADFIDCVVGYLaiDNG-NLIMELSERvgvnHAlmTSVNFDPEWWVLFANSVLDCIRQYCEPQficlpisrhITRKIMIAWRILLKEVVDRMSEAF- +>gi|325180197|emb|CCA14598.1| conserved hypothetical protein [Albugo laibachii Nc14] +----------------------------------NAPELKPVFKT----------SKHARNvvlqhIVGGLRTMLAHDVHIER-VRALTRTHL-QFGVKMEYFDLLGQAVIFSMRHCSGSHWSSEIEEAWRRLYGHCSVIL----- +>gi|29841203|gb|AAP06216.1| similar to trematode hemoglobin from Paramphistomum Epiclitum [Schistosoma japonicum] +-VTQSQVDHlITELEPHVDteAHKLELGLKVYECFLKDRPEYICKFSRLqGLDAsnvaQSEGIKYYARTFVAAFVPMIQAAANKcelDKLCLEEAILHR-TRPVDEKIFQDSLPIFIKIFNNLI---KDQQNKETMSKILTYTFTMIGSQ-- +>gi|339237741|ref|XP_003380425.1| putative globin [Trichinella spiralis]gi|316976730|gb|EFV59959.1| putative globin [Trichinella spiralis] +-LNPKEVILTRNVWAALKeKHQHLVGMEIFRQIFNRRPDLKSLFGVSALDtemaLNSTRLHRHTMIFQDVIDILMVNISNVDVnIADSLIDLgaqHWvlTKRGFDPAYWLIFGDVLFDLVENvtrKLPSR--KRSTNAWRKTIAFMLDCMQIGY- +>gi|319789164|ref|YP_004150797.1| hypothetical protein Theam_0183 [Thermovibrio ammonificans HB-1]gi|317113666|gb|ADU96156.1| hypothetical protein Theam_0183 [Thermovibrio ammonificans HB-1] +-IREKDVQNLRFLGNILLPYKKEFADAFYDNLM-KFEDIKEYIPE-------EKLQRLKTTIQEWYEKLFsGKYDSEYlLYLLRIAKVHV-EEGIPPHYIIVamnfVSRFCTRRIaefFAKKAREFfehyrkeqdvvcdparaeilegfvleevfeRmEDLTRSLRKILALNEDVLVSYYV +>gi|301061358|ref|ZP_07202138.1| conserved hypothetical protein [delta proteobacterium NaphS2]gi|300444535|gb|EFK08520.1| conserved hypothetical protein [delta proteobacterium NaphS2] +-FTDQDEVLLKKMAELFMPIHDRFADDFYRYLQ-EDDYTAGYFRT-------DAaVNRRKETILQWLNEILtSKYDNRLlIKLVRIGKIHV-KIGLDGHYVNAamgfIRRYFQEHLNQVVSEPAKrEIMIETLDKALDISLDIMTSSYR +>gi|336324532|ref|YP_004604499.1| hypothetical protein Flexsi_2317 [Flexistipes sinusarabici DSM 4947]gi|336108113|gb|AEI15931.1| hypothetical protein Flexsi_2317 [Flexistipes sinusarabici DSM 4947] +-VKESDLKELGSIWEQMSKYSDEFTTDMSAFVV-KNFKLPETYST-------EITDEYKAMLGKLYERVLsGRFNNDYvSFLIKFSEFNL-AYSINQEMVNSlisyARSWIHEKIFQNIPDDFQrKGILMMFHKIMDITGDIIISTYY +>gi|289549194|ref|YP_003474182.1| hypothetical protein Thal_1426 [Thermocrinis albus DSM 14484]gi|289182811|gb|ADC90055.1| conserved hypothetical protein [Thermocrinis albus DSM 14484] +-WTAEDEENLRSLSHLVPSWVEEFLES---------------IRK-------DEhDERCFQSIREWLIATFsGPHDERYvRKIHNMLQEHL-KAGCTLHHLQVllssVREFLLDKLTAQLGYSHQrDSLFRSVEKTLDLSLSIMLLSQK +>gi|238638045|gb|AAZ82851.2| Hypothetical protein B0361.4 [Caenorhabditis elegans] +--------RIQQCFKAA---KPSIGDAIMKRAAASRAEMRTMLSKMN----EKQIECLGKQMFELITDAVENADKSEKVLTHARQLggtYAslCPLGFRPDLFAPLADAAIAECVKLDGVHKRCETLSAWSQLFSALFTGVRDGY- +>gi|17509143|ref|NP_492188.1| GLoBin family member (glb-26) [Caenorhabditis elegans]gi|3880027|emb|CAA99921.1| C. elegans protein T22C1.2, confirmed by transcript evidence [Caenorhabditis elegans] +-LNSYQKSIVRNAWRHMSQKGPsNCGSTITRRMMARKSTIGDILDR-------STLDYHNLQIVEFLQKVMQSLDEPDKIsklCQEIGQKHA-KYrrskGMKIDYWDKLGEAITETIREYQGWKIHRESLRAATVLVSYVVDQLRFGY- +>gi|320164492|gb|EFW41391.1| predicted protein [Capsaspora owczarzaki ATCC 30864] +------HAVLHASWVKATegDNGDAVLTGFLVRLQSNNPQATLIYERAD-----PRMRK--VIIWTAVSKILDCMQNPRSLrkeLKPLGQSHA-KMGVTGPMLDSFGVALRAVLKDVLKARYTTETDMVWRRCYRLFSVQ------ +>gi|170573273|ref|XP_001892405.1| hypothetical protein Bm1_04635 [Brugia malayi]gi|158602064|gb|EDP38764.1| hypothetical protein Bm1_04635 [Brugia malayi] +-LSEIQQELIRQSWQTISAKlevnEQNFGFFVYRRVFEHNPLLKRAFHVeeYDLLDSIP--REHSifrqmRLFTNLIALAVRHDNELETeIAPAVFRYGQRHYKFAAEYFNegtvrLFCSQVVCAVADLLEVDIDPACMEAWIDMMRFIGCRL----- +>gi|118572332|sp|P0C227.1|GLB_NERAL RecName: Full=Globin; AltName: Full=Myoglobin +-LSADQKAAIKSSWAAFAADITGNGSNVLVQFFKDYPGDQSYFKKFdGKKpdelKGDAQLATHASQVFGSLNNMIDSMDDPDKmvgLLCKNASDHI-PRGVRQQQYKELFSTLMNYMQSLpG-ANVAGDTKAAWDKALNAMANIIDAEQK +>gi|154818214|gb|ABS87592.1| hemoglobin III [Phacoides pectinatus]gi|197320684|gb|ACH68470.1| hemoglobin III [Phacoides pectinatus] +-LTGPQKAALKSSWSRFMDNAVTNGTNFYMDLFKAYPDTLTPFKSlFeDVSfnqmTDHPTMKAQALVFCNGMSSFVDNLDDHEVlvvLLQKMAKLHF-NRGIRIKELRDGYGVLLRYLED-H-CHVEGSTKNAWEDFIAYICRVQGDFMK +>gi|121267|sp|P29287.1|GLB_BURLE RecName: Full=Globin; AltName: Full=Myoglobin +-LSGAEADLLAKSWAPVFANKDANGDNFLIALFEAFPDSANFFGDFkGKSiadiRASPKLRSVSSRIVNRLNDFVGNAADAGKmagMLDQFSKEHV-GFGVGSQQFENVRSMFPGFVSS-V-AAPPAGADAAWGKLFGLIIDALKKAGK +>gi|327360040|emb|CBL51562.1| globin [Branchiostoma floridae] +---------LQKSWKTVARKSDQAARTVFLRMLQDNPGLRQKWPRIsLLtEeeiPTSPYIKFLGERIFDCLDYIIDNLGDLDHVISELTKLGR-QhSDMNV---------------------------------------------- +>gi|198424683|ref|XP_002119119.1| PREDICTED: hypothetical protein [Ciona intestinalis] +PFTDEELKLLRNSWDEVKKLGmk-EVGLHIFTGLLNAAPSLRTLFYTIdLPdEeeltidvmrenkkvvahatRIAN-----------AISKFIKFLDQPEElekLLTSLGESHA-RRQVDPESFEYVAPVILSVIGGHLKLPSNSPTLQAWVKAYGVLRNGIVSAME +>gi|316933010|ref|YP_004107992.1| globin [Rhodopseudomonas palustris DX-1]gi|315600724|gb|ADU43259.1| globin [Rhodopseudomonas palustris DX-1] +SplatFSPADIHRVRTSFDLMWPRSTEMADQFYARLFEIAPDSRTLFRS--------DMTRMKDKFIQTLAVLVGSLDNLTGlyaVAGKLAVDHV-RYGVRPDHYAPVGEALLWSLGRQLAGFWDDDVEQSWRKVYAVISARMIGAAY +>gi|307192580|gb|EFN75768.1| Globin [Harpegnathos saltator] +-MSEKQKKLVQNTWAIVRKDDVSSGLAIMNAFFTRYPEYQDQFKSFkGIPfeelSKNKKFQAHCVSVIAGLSNVIDHIHNpelMEASLINLAERHK-NRGQTREHFQNLRYVLEDLIPSVFGKQYTQEVQEAWKKMFDYLFLILCQ--- +>gi|289741211|gb|ADD19353.1| hemoglobin-like flavoprotein [Glossina morsitans morsitans] +-MNSDEVYEIKRTWEIPATTPTESGVAILIRFFTKYPSNLQKFSTFkDMTldelKNNPRFKAHANRIMKVFDDSIKTLDDncshLEEIWTKIAQSHF-NRQIEKQSFNELKEVILEVLVAAC--NLNDQQTEIWLKLLDFVYEIIFKTID +>gi|83754459|pdb|2C0K|A Chain A, The Structure Of Hemoglobin From The Botfly Gasterophilus Intestinalisgi|83754460|pdb|2C0K|B Chain B, The Structure Of Hemoglobin From The Botfly Gasterophilus Intestinalisgi|3885490|gb|AAC80435.1| hemoglobin [Gasterophilus intestinalis] +-MNSEEVNDIKRTWEVVAAKMTEAGVEMLKRYFKKYPHNLNHFPWFkEIPfddlPENARFKTHGTRILRQVDEGVKALsvDFgdkkFDDVWKKLAQTHH-EKKVERRSYNELKDIIIEVVCSCV--KLNEKQVHAYHKFFDRAYDIAFAEMA +>gi|336310636|ref|ZP_08565608.1| globin [Shewanella sp. HN-41]gi|335866366|gb|EGM71357.1| globin [Shewanella sp. HN-41] +GLTEIEKEAITSSFSLINHQEQHFATIFYDCLFDMAPLIKPMFKRDrKLi----------EEHFYMIFCAAVDNIHHLDTirtILLELGARHR-NYGVKVLHFPIVKSALILAIQHELKGQSNASIENAWSHYYDVLAAIILEG-- +>gi|2155297|gb|AAB58934.1| globin XII [Chironomus thummi thummi] +QFVEDQTEIIRASWNQVKH----NEVDILYSIFAANPDIQARFPQFaGKDlktlKSSSSFASHAGRIVGFFSKITELNPNdsgvsaAKTLINEVAASHK-GRGVSKAQFNAFRVSLTAYLADHV--TWNDNVAQAWEKGLDNVYFVLFSAF- +>gi|1805725|emb|CAA71646.1| globin Cpa 3-2 [Chironomus pallidivittatus] +ALTADQISTVQASFDKVKG----DSVGILYAVFKADPSIQTKFTQFaGKDletiKGTTPFEAHANRIVGFFSKIISEL--pnIDADVDAFVATHK-PRSVTHDQLNNFRAGFVGYMKAHT--DYAGA-ESAWGATLDTFFGAIFAKM- +>gi|291224997|ref|XP_002732490.1| PREDICTED: neuroglobin-like protein-like [Saccoglossus kowalevskii] +-LTPSEAIAIQSTWLFVYEDKEENGVELFVKLFTEHPDYQALFGYLeGIvGieniKNVPFLRVHASHVLIYLNTMLESLNDgtiLVELLKTLGYTHV-GLNLTPEHFDALGPILISLLQEKGGDSFTPFAEKAWLKGWGVmksvIVGALENGYQ +>gi|321477133|gb|EFX88092.1| hemoglobin [Daphnia pulex] +-LSTQERAIIRTTWNKARKDG-DVAPKLLFKFLKAYPEYQKKFSKFaDVPqsnlLSNGNFLAQAYTILAGLNVIVQSLSSqelMANQLNALGGAHQ-PRGVTTTILElkEFGVILIQVLEEEIGSAMTIDARQAWKNGIHELIGGLSQTL- +>gi|162453361|ref|YP_001615728.1| flavohemoprotein [Sorangium cellulosum 'So ce 56']gi|161163943|emb|CAN95248.1| putative flavohemoprotein [Sorangium cellulosum 'So ce 56'] +------VGLLRESFELVIERAPNLTHRFYGILFSRYPQVKPLFGRNSQe--------QQEKMLTEALAAVIDRLEDASwleEKLMAMGAKHV-DYGVTDAMYPWVADALISAMAEVAAAEWSPAHQEAWTEALGAIASLMQRGAR +>gi|326383372|ref|ZP_08205059.1| oxidoreductase FAD/NAD(P)-binding domain-containing protein [Gordonia neofelifaecis NRRL B-59395]gi|326197778|gb|EGD54965.1| oxidoreductase FAD/NAD(P)-binding domain-containing protein [Gordonia neofelifaecis NRRL B-59395] +-MSEPEQPILA---RTRVLLAgdpDRFARNVFARMFAMRPSLREFFPA-E-------MGSLRSSFVDVLDHVLEAIEAPDghgelvEFLAQLGRDHR--kYGVVSEHYWLMYEALMSEFQFAFGRRWTPEVEETVGQAMLLTTGVMRG--- +>gi|170031919|ref|XP_001843831.1| globin 2 [Culex quinquefasciatus]gi|167871230|gb|EDS34613.1| globin 2 [Culex quinquefasciatus] +-LTGKQKITLLSAWGLIKQDLDLHGRNIMLLIFREHPHFIPYFD-FsADpNntslSENRALQAHSLNLIMALGALIEYgLKTpkmFECTLAKLVKNHK-TRRVTSQDVKMFGEVILMYFAQVLGRQSASSLPTAFNRLIEQIAEAFEAA-- +>gi|314933259|ref|ZP_07840624.1| putative flavohemoprotein [Staphylococcus caprae C87]gi|313653409|gb|EFS17166.1| putative flavohemoprotein [Staphylococcus caprae C87] +MLTEKEKSIIKETVPVLQDKGEIITSHFYKRMFKQHPELKNMFN---Q--TNQQKGLQSTALAQSVLAAAVNIEHLEnimPVVKEIAYKHC-ALQVPPAGYDIVGENLIEAIKEVVGLDDDHEIIKTWKKAYQDIADVFISveqdIYS +>gi|255722886|ref|XP_002546377.1| conserved hypothetical protein [Candida tropicalis MYA-3404]gi|240130894|gb|EER30456.1| conserved hypothetical protein [Candida tropicalis MYA-3404] +PLTARQIEIIHQSIPILESLDIRLGEKFYKRVVRRYDNLKPYFN---ETntklLRQPRAFAY------TILMYAKYIEDLSplqELLNRIISRHI-GLQVKPEQYPLLGEVFIETMADLFPPGvADDEFKEAWATAYGNLSNMLieaeRVEYA +>gi|221117935|ref|XP_002162062.1| PREDICTED: similar to neuroglobin [Hydra magnipapillata]gi|221123757|ref|XP_002164731.1| PREDICTED: similar to neuroglobin [Hydra magnipapillata] +PLSGKEIETLKKSWTTAKQFWNEICTCAFSRWFSTYPEIQSKFGVYgDNlTmnevLASESLCIHIRKSVELIEIIIKKVDErheLSEYLIELGKLHH-KFGAEQKYATALGSSFVFAISQIC-PNIDMITEGAWDSLFKYIVTHIKLGIR +>gi|198419488|ref|XP_002119301.1| PREDICTED: hypothetical protein [Ciona intestinalis] +PLTEIEIEGVQESWEKVSSgGPKTTGLILMEKLFNTYPASIAVFSHLgIPsKpdgaitvsdlASIGGVSNHAVSLASRIGKLVGLLNNeteLKESSTEVGRIHV-KYGVTSEHVDLLGSVLLSVISENQGLSNTSELIGWWSKTWNIIGNYVKTGL- +>gi|154252910|ref|YP_001413734.1| globin [Parvibaculum lavamentivorans DS-1]gi|154156860|gb|ABS64077.1| globin [Parvibaculum lavamentivorans DS-1] +--TEQEKQLIEKSIERVADVAGDPASHVYARLFAQQPEMEAMFV---LD-TDGNVRGHM--LSEALDCVFDFLGPRAYapvLIQSELTNHS-NLGVPPAVFATFFRVVMETFRELLGAEWTAETDAAWAKLLGEFDTTIAEHAE +>gi|67463861|pdb|1X3K|A Chain A, Crystal Structure Of A Hemoglobin Component (Ta-V) From Tokunagayusurika Akamusigi|220702318|pdb|2ZWJ|A Chain A, Crystal Structure Of A Hemoglobin Component V From Propsilocerus Akamusi (Ph4.6 Coordinates)gi|288562877|pdb|3A5A|A Chain A, Crystal Structure Of A Hemoglobin Component V From Propsilocerus Akamusi (Ph5.6 Coordinates)gi|288562878|pdb|3A5B|A Chain A, Crystal Structure Of A Hemoglobin Component V From Propsilocerus Akamusi (Ph6.5 Coordinates)gi|288562879|pdb|3A5G|A Chain A, Crystal Structure Of A Hemoglobin Component V From Propsilocerus Akamusi (Ph7.0 Coordinates)gi|291463432|pdb|3A9M|A Chain A, Crystal Structure Of A Hemoglobin Component V From Propsilocerus Akamusi (Ph9.0 Coordinates)gi|330689360|pdb|3ARJ|A Chain A, Cl- Binding Hemoglobin Component V Form Propsilocerus Akamusi Under 500 Mm Nacl At Ph 4.6gi|330689361|pdb|3ARK|A Chain A, Cl- Binding Hemoglobin Component V Form Propsilocerus Akamusi Under 1 M Nacl At Ph 4.6gi|330689362|pdb|3ARL|A Chain A, Cl- Binding Hemoglobin Component V Form Propsilocerus Akamusi Under 500 Mm Nacl At Ph 5.5gi|409322|gb|AAB27065.1| hemoglobin component V [Tokunagayusurika akamusi=midges, 4th-instar larva, Peptide, 152 aa] +-LSDSEEKLVRDAWAPIHGDLQGTANTVFYNYLKKYPSNQDKFETLkGHPldevKDTANFKLIAGRIFTIFDNCVKNVGNDKgfqKVIADMSGPHV-ARPITHGSYNDLRGVIYDSM--HL----DSTHGAAWNKMMDNFF-------- +>gi|290462305|gb|ADD24200.1| Non-symbiotic hemoglobin 2 [Lepeophtheirus salmonis] +----------------------------------------------------KKLKRHGGIVMKALGKLVGFLETgkiiaIVNTIKGIANSHS-KRGVLVQQFTPICDILLKYLGEAFGDQLSNEGTATWKKFLDIFVSVINEAYD +>gi|260950945|ref|XP_002619769.1| hypothetical protein CLUG_00928 [Clavispora lusitaniae ATCC 42720]gi|238847341|gb|EEQ36805.1| hypothetical protein CLUG_00928 [Clavispora lusitaniae ATCC 42720] +-FSPKEIALVRYTWNRMLVDDAEPKIslpGAFARpkkmaqssLHASSTFCTQLYSNLlSMDpeleQAFPSLRHQAVSMAGVMSLAVNSLDNLsslDAYLEQLGKRHS-RiLGIEPFQFEMMGEALVQTFVQRFGSKFTQQLEVLWIKFYMYLANTLL---- +>gi|51773697|emb|CAH23231.1| hemoglobin [Biomphalaria glabrata] +-VSDDDRRALQSSWSRLQsqaGNKQEAGIKLVTWLFDNVPNMRDQFSKFnAHsSdealRANNEFLRQVDVIVGGLDSLINNVDNSDNfqaAIERLVDVHL-HmsPSVGLEYFGPLQQNIRSYIQNALGVAADSAEARSWTNLFTAFNEFLA---- +>gi|196001585|ref|XP_002110660.1| predicted protein [Trichoplax adhaerens]gi|190586611|gb|EDV26664.1| predicted protein [Trichoplax adhaerens] +-------------------------------------------------dlIKDPLVRSHGLRFMKAIETMLEIEFDSNGcifLFSAIGNRHC-SYGIEADYLDYVPQAFRFMLTKALGNNYTDKIASVWDEILSHIIKAMQDKVR +>gi|50897143|dbj|BAD34605.1| hemoglobin III [Calyptogena nautilei] +-VTIADVKNVQTSWNSIKDKWEtDHGLNFYTTLFDDVPEIKSAFVKAgNK--TDYQVKGQAVRFGRMVTEWIDNLDNEEAlvaKINGMCATHR-TRGItNVDLFEVALGELVKYIAGK--TSFTKAQRESWAVVNGCIIQTMRNYF- +>gi|122733|sp|P06148.2|HBF1_URECA RecName: Full=Hemoglobin F-Igi|162545|gb|AAA30331.1| F-I globin precursor [Urechis caupo] +-LTTAQIKAIQDHWFLNIKGcLQAAADSIFFKYLTAYPGDLAFFHKFsSVPlyglRSNPAYKAQTLTVINYLDKVVDALGGNAGaLMKAKVPSHD-AMGITPKHFGQLLKLVGGVFQEEFSA--DPTTVAAWGDAAGVLVAAM----- +>gi|153876610|ref|ZP_02003843.1| bacterial hemoglobin [Beggiatoa sp. PS]gi|152066926|gb|EDN66157.1| bacterial hemoglobin [Beggiatoa sp. PS] +-------FEIQSTYEKILPHLDEFSRLFYQQLFEIKPAFKILFRQTDLrIq------KQMVIRMIEVVVQGINNLENFMSIIQRIHQRHY-ELHLKPEDYRLAGQALVLSLEKYFGDEFTPTLKKIWLDFYESIVATM----- +>gi|1707914|sp|P51536.1|GLBC_NIPBR RecName: Full=Globin, cuticular isoform; Flags: Precursorgi|309617|gb|AAA65540.1| globin [Nippostrongylus brasiliensis] +------KKHTVESMKAvpVGRDKAQNGIDFYKFFFTHHKDLRKFFKGAeNFGaddvQKSKRFEKQGTALLLAVHVLANVYDNqavFHGFVRELMNRHE-KRGVDPKLWKIFFdDVWVPFLESK-GAKLSGDAKAAWKELNKNF--------- +>gi|121251|sp|P27613.1|GLBH_TRICO RecName: Full=Globin-like host-protective antigen; Flags: Precursorgi|161675|gb|AAA30102.1| globin-like, host-protective antigen [Trichostrongylus colubriformis] +------RKDALSALDVvpLgsTPEKLENGREFYKYFFTNHQDLRKYFKGAeTFTaddiAKSDRFKKLGNQLLLSVHLAADTYDNemiFRAFVRDTIDRHV-DRGLDPKLWKEFWSIYQKFLESK-GKTLSADQKAAFDAIGTRF--------- +>gi|196001583|ref|XP_002110659.1| hypothetical protein TRIADDRAFT_54901 [Trichoplax adhaerens]gi|190586610|gb|EDV26663.1| hypothetical protein TRIADDRAFT_54901 [Trichoplax adhaerens] +-------------------------------LIKLSPATKIYFHGVDFeKrdsylAKNTFLRNHAARFMEAINVIIGQdMDIfsVESYFRVVGSKHH-SYNLKLEHVQDISDAFLEMARNALKKKFTKSTEAAWRSFFQMVTDAIKNG-- +>gi|170589934|ref|XP_001899728.1| hypothetical protein Bm1_41355 [Brugia malayi]gi|158592854|gb|EDP31450.1| hypothetical protein Bm1_41355 [Brugia malayi] +-LSLEDRKLLHETFNLFEKDLTTNGLRIFLqlalitkfiylfcmRTLSENPDYKYFWPQFrAIPdsslISSSVLRNFAHTYMNALKKIVESLNNEQmpyEVLQRISAKHA-RHNIQMHHMQKMIKPLLENVRRALG-RHDENAERAWETLFQTVG-AIVEHYK +>gi|320164492|gb|EFW41391.1| predicted protein [Capsaspora owczarzaki ATCC 30864] +---HETRDVIKSTWALAIQKQDEADvtpvatfvNVFFGKLFELCPETRLVFGQ-DLsL-QGKS----LSSVLTGMLEFVVHPKKLTTQVKSLAVKHV-GLGITPDMFDAFGAALVYTIKTRIGKVWSPQTERVWVDAYGGVNNIITQQMS +>gi|341887822|gb|EGT43757.1| CBN-GLB-25 protein [Caenorhabditis brenneri] +----RDFFTLKNWWKSVDRKRVEASTYMFSRYLNDFPENKAFYAKLkNVnAqtvdmnCSDPGFEAMAAQYLKVFDDVITAveekPGDVQSacdRLSAVGKMHR-AkvSGagMESSMFQNMEEPFIQMVKYILQDRFNEKAEMLYRKFFQFCLKYLLEGF- +>gi|242007866|ref|XP_002424739.1| hypothetical protein Phum_PHUM149070 [Pediculus humanus corporis]gi|212508232|gb|EEB12001.1| hypothetical protein Phum_PHUM149070 [Pediculus humanus corporis] +--------VVLNDWPKIRKNYKKIFIDSFINYFAENPNYKLLFPSFsNVSeddlPFNHCFRLHCFAVYKAINFLMSNwLGEyeedDSKILPVIGKTHF-DRGITLEMMNLYKHSIVYSCNNHLKPN--LKRKLSWQTVFDHIFDyYLGSAY- +>gi|17567933|ref|NP_509614.1| GLoBin family member (glb-17) [Caenorhabditis elegans]gi|3877381|emb|CAA86423.1| C. elegans protein F49E2.4a, partially confirmed by transcript evidence [Caenorhabditis elegans] +-ITDEEVTAIRDVWRRA--KTDNVGKKILQTLIEKRPKFAEYFGIQsESlDiralNQSKEFHLQAHRIQNFLDTAVGSLGFcpissVFDMAHRIGQIHF-YRGVNfgADNWLVFKKVTVDQVTtgttdsskekedtnsngtangkvdtdaSLIPiadinNVYSGencLARLGWNKLMTVIVREMKRGFL +>gi|339235297|ref|XP_003379203.1| putative WAP-type 'four-disulfide core' [Trichinella spiralis]gi|316978179|gb|EFV61192.1| putative WAP-type 'four-disulfide core' [Trichinella spiralis] +-LSVYDKRLIEESWCALR-DKDEVAKQIFLRVLTSNEKIRTIFDLH-TcPddelEENSAFQRHVKSLSLFLSICADSlsvsPDRLVSIARSIGEKHV-NfRWvsFDAEYWLLMKCAMVEVISSKQRPKISHLVNTAWNSLLSFVIFEIKHSFL +>gi|9664871|gb|AAF97245.1|AF284381_1 dehaloperoxidase A [Amphitrite ornata] +----------------IRGDLRTYAQDIFLAFLNKYPDERRYFKNYvGKSdqelKSMAKFGDHTEKVFNLMMEVADRATDCvplASdanTLVQM-KQHS---SLTTGNFEKLFVALVEYMRA-SGQSFDS---QSWDRFGKNLVSALSSA-- +>gi|308487955|ref|XP_003106172.1| CRE-GLB-28 protein [Caenorhabditis remanei]gi|308254162|gb|EFO98114.1| CRE-GLB-28 protein [Caenorhabditis remanei] +-LVPDHHKLIRKSWGRI--PKTQFGKAALEAFIRISEVNHSIFGD--KeT-----ENRHIKYFVDLVQSSVDNLEDLEAsvkpWLDLIGRGHS-DFKITGKHWENFAESLLNTATEWNGPGrRHKETVRAWMLMTSFLADRLAHASR +>gi|307179623|gb|EFN67896.1| hypothetical protein EAG_07959 [Camponotus floridanus] +-FSAEEYALVKKVWSGIEINPQFHGNACLRSFCEIYPQYVKFFTQEsKLh-lSFDTRITVKFSIIMETMGYLLldfnkrpKQLDRLVG---YIAMVHK-DMQLDEQDMRNFATSLFQYLSRTYPTQMTAQCQEAMSKFMDSVIKELFV--- +>gi|194745446|ref|XP_001955199.1| GF16359 [Drosophila ananassae]gi|190628236|gb|EDV43760.1| GF16359 [Drosophila ananassae] +-FTLSERLALRQAWNIVRPFTRRYGQEIFFNYLNDAYLRISKFKHTkGN--YLHVLHNHFRGFLCFIGSLIEEQDPvmFQLMLNDNNMTHS-RCKVGAAFILELAQAMTDYILKVFEKVSSASLESGFRKIVD----------- +>gi|312115464|ref|YP_004013060.1| globin [Rhodomicrobium vannielii ATCC 17100]gi|311220593|gb|ADP71961.1| globin [Rhodomicrobium vannielii ATCC 17100] +---PNQLAALREFFSNIESDLPEIVSLMYERFFEAVPEAASLFKG-DFAaqqmQFTSMLRST-----IALTRSSQLwPVDAEAgraslpEIEYLGLRHA-CVGVQPEHFAAMKWALARTLAEMYPRDFDRDIEDALSFIFDVVARA------ +>gi|298705122|emb|CBJ28565.1| hemoglobin [Ectocarpus siliculosus] +--------GVKRNWAMISEGPETQGSfngastslvalydTFFARLFLLEPSMRSMFGD-NM-iRQ--------SKFLVGLVNIIVKMEDmLKQGLSPLysfADRSV-AMGARPEHYEVIAEVLPIALEACAGEGvWTPDIAGAWRDVMSFAILAVV---- +>gi|241825972|ref|XP_002414692.1| cytoglobin-1, putative [Ixodes scapularis]gi|215508904|gb|EEC18357.1| cytoglobin-1, putative [Ixodes scapularis] +GLSKRDTKLIRNSWSMLCKQHPKADQLIFKALFTKHPDFMALFQHFkDKDlgvvLSDPQFALHSSAIIQAFGTIIRSLDDPAGVVAlirKNATDHT-TrKGVQPSHFEAMLNVVLEVLQDKLGSRFKPEAITAWEKFIEV---------- +>gi|291238620|ref|XP_002739222.1| PREDICTED: Globin D, coelomic, putative-like [Saccoglossus kowalevskii] +SLTDQHRVILLDSWKVIqeD--IAKVGVIMFMGLFETHPECKEVFMPFkELQgddlRWSSALKAHGLRVMAVIERVLARIDSdekIEEHLKALAKKHV-EYGANSDLVRLFGPQFIGSMKRQLHKSWSDEMQDAWTVLFDIIIYHMTT--- +>gi|196016934|ref|XP_002118316.1| hypothetical protein TRIADDRAFT_62364 [Trichoplax adhaerens]gi|190579092|gb|EDV19196.1| hypothetical protein TRIADDRAFT_62364 [Trichoplax adhaerens] +-LNYQERQAIIDSWNAISTEKQKYGTILFLKLFELEPRVKSLFTIFdFNEplediIQSPHFRSHAMRFMQSLETGVlMGFDKesCDFLFKSLGSRHH-FYDLKSEFLDVIPECILHTIKKGCGNNWSNETADAWKIATKVLCELFRE--- +>gi|47115693|sp|Q7M416.1|GLB1_LIOJA RecName: Full=Globin-1; AltName: Full=Myoglobin Igi|298242|gb|AAB25285.1| myoglobin I [Liolophura japonica, Peptide, 145 aa] +-ISADQAKALKDDIAVVaqN--PNGCGKALFIKMFEMNPGWVEKFPAWkGKSldeiKASDKITNHGGKVINELANWINNINSASGILKSQGTAHK-GRSIGIEYFENVLPVIDATFAQQMGGAYTAAMKDALKAAWtGVIVPGMKAGY- +>gi|326430027|gb|EGD75597.1| hypothetical protein PTSG_06664 [Salpingoeca sp. ATCC 50818] +RLDMEQLKIALGSWTAVVELVPTWHEVFFAELFQAHPETERLLYSSDKSk--S-WNERHMARVGKSVGDVIKSLsnyDDVIEHLTALGTRHA-RYGLHVDQLDLFINAFLWTLGAGLGDSWDHSVKKAWMHVLPFILSPLKS--- +>gi|303271567|ref|XP_003055145.1| predicted protein [Micromonas pusilla CCMP1545]gi|226463119|gb|EEH60397.1| predicted protein [Micromonas pusilla CCMP1545] +-WQKRKKRFVRQSWQAAceNApcGVDGVGDELLRRFFASHPGFVQLF-NFr---rarlLNSAAWRLHARSVARAIDDFVvvatagadpssSSFDNgawrrTKTALRELGGRHL-VaYKVTPAHYDAFGRALLATVEATArggggggdgggdggggdggGdGGFDRETLAAWTETWEGMRELM----- +>gi|241950916|ref|XP_002418180.1| conserved hypothetical protein [Candida dubliniensis CD36]gi|223641519|emb|CAX43480.1| conserved hypothetical protein [Candida dubliniensis CD36] +SLNKIELNQIKSSWSKIN--PKEFYPELYENLFELNPQLRSIFNND-----DSVINYHCDIFGDLFNFIINNIEDdllLNEFLYQFvNENQR-FSSMISQYLEPMGNALIQTFKNELSGQFTSVLELIWIKIFVFVANLLL---- +>gi|47230183|emb|CAG10597.1| unnamed protein product [Tetraodon nigroviridis] +KLSSKDKELIRGSWDSLGKNKVPHGVILFSRLFELDPELLNLFHYTtNCGstqdcLSSPEFLEHVTKVSETVPgeppprlhaeprsvipgDACDRRCSqppgrppLpgglfaqpgakasgsgsqttvvccmIHTVQLFRHLHI-FvHRkkkttcfftcllVCFGLLQMVGESLLYMLQCSLGQAYTASLRQAWLNMYSVVVASMSRGW- +>gi|339256778|ref|XP_003370265.1| putative globin [Trichinella spiralis]gi|316965561|gb|EFV50254.1| putative globin [Trichinella spiralis] +KFTDEEVELLARTWKKDD--FDwlyRIGTDIYTCVFQLAPELKVFFPYVtECEkknqswESSKGFRTQALRFVQILGMAVEKTESrmkdddshLHHRLYKLGETHR-RfalKGFTPTHWKGFVIAVRVAMRRAVeaMPNLTPAecetAIEAWDKLSRYVVHRMEEGY- +>gi|289208690|ref|YP_003460756.1| globin [Thioalkalivibrio sp. K90mix]gi|288944321|gb|ADC72020.1| globin [Thioalkalivibrio sp. K90mix] +-------D-VHQSYGRCrR--AGDFVTRFYEHFLQADPRVKAAFGSTDFSqq---KRALGQA--ISTAISYAEGE-SFVASTMERMGQVHS-RegrVPVEPDLYPIWLDCMVRTAAEID-PRWEPRLEERWRGAMQPAIDLFVRLY- +>gi|268557836|ref|XP_002636908.1| Hypothetical protein CBG09371 [Caenorhabditis briggsae] +-IVDDDFELARAHWIQLQK-SNKQGLAirgCFLTMLEKYPQVRPIWG-FgKRiegriDetwkpelVEDFYFRHHCASLQAALNMIIQNKDDrngMRRMLNEMGAHHF-FYDACEPHFEVFQDCLLESMRLVLngGDALDDEIEHSWICLLQTIRLHMGE--- +>gi|308466646|ref|XP_003095575.1| CRE-GLB-8 protein [Caenorhabditis remanei]gi|308245099|gb|EFO89051.1| CRE-GLB-8 protein [Caenorhabditis remanei] +PLTCAQIHLVRALWRQVYTtkGPTVIGASIYHRLCFKNLMVKEQMKQVELPpkfqNRDNFIKAHCKAVAELIDQVVENldhLDNVTNELMRIGRVHA-KvLRgeLTGKLWNTVAETIIDCTLE-WGDrrCRSETVRKAWALIVAFVIEKIKAG-- +>gi|308071704|emb|CBX25204.1| C. elegans protein Y17G7B.6a, partially confirmed by transcript evidence [Caenorhabditis elegans] +NLSVKQKKLLRQSFNAMN--SGgtflKLMEKIFRRLETKCPDMRSIFLTtaFvnSLSrerQTPPLVkteYDHCKCMVGIFERLIENLENINEQLTMirhYGEKHA-QmaeSGFTGAMIEQFGEISVFVIGSQDVVKFNHETVKAWRLLLACVTDEMKVGF- +>gi|322493086|emb|CBZ28370.1| adenylate cyclase-like protein [Leishmania mexicana MHOM/GT/2001/U1103] +--------TVEGTWRILEDegMVEQFGQQLYDELLTRNRRLRVHFYGV-------DIEEQSKSLLRMVGTAVHFYQKpqlTVDMFTKAGARHR-GYGVNAEVFVEMRNAFMRVFSKFVGTDVFQAAEEEWQKFWKYVLDLLVH--- +>gi|341888892|gb|EGT44827.1| CBN-GLB-12 protein [Caenorhabditis brenneri] +-LNKKDRTLLRETWQRLDE-PkDIVGLIFLDIVNDIEPDLKKVFGV-DRApraamLKMPKFGGHILRfyeFMEQLTSMLGTSENLTGAwqlVRKTGRSHV-KqgfleqnqNQMEKNYFEVVINVFIERLIPYLtgeqelppaegkeqkkvrfAQNYtTSQITDVWKKFLNTVISQMTDSF- +>gi|206994324|emb|CAR81336.1| C. elegans protein C06H2.5a, partially confirmed by transcript evidence [Caenorhabditis elegans] +HLSPHQVQLLTSTWPRIK--TqSSLFTQVFKVLMQRSPVCREMFQKMsivgGFSsn-SVCDLNSHTKLLCELLDSLMTDLHQPAkIVLakcQDVGAAHV-NMNekCCGVVFDQLGEAFTELITKVECVRSKREAVKSWMCVISYMADSIKSGY-