diff --git a/modules/seq/alg/pymod/wrap_seq_alg.cc b/modules/seq/alg/pymod/wrap_seq_alg.cc index 7ec249db1bfb54da1dae2611603e03140bcf8f51..f632d3d6eb4656a2d546c9542cf28321c5a15c9a 100644 --- a/modules/seq/alg/pymod/wrap_seq_alg.cc +++ b/modules/seq/alg/pymod/wrap_seq_alg.cc @@ -35,8 +35,8 @@ BOOST_PYTHON_MODULE(_seq_alg) .value("LONGER_SEQUENCE", RefMode::LONGER_SEQUENCE) .export_values() ; - def("SequenceIdentity", &SequenceIdentity, arg("seq_a")=0, arg("seq_b")=1); - + def("SequenceIdentity", &SequenceIdentity, + (arg("ref_mode")=RefMode::ALIGNMENT, arg("seq_a")=0, arg("seq_b")=1)); class_<AlignedRegionList>("AlignedRegionList", init<>()) .def(vector_indexing_suite<AlignedRegionList>()) ; diff --git a/modules/seq/alg/src/sequence_identity.cc b/modules/seq/alg/src/sequence_identity.cc index 855d3ec48efe79ae4e4941c0d94b07f08f25f5dc..5d0a7dc2d2237f3abaf1cfd8b2badfe75385f3ff 100644 --- a/modules/seq/alg/src/sequence_identity.cc +++ b/modules/seq/alg/src/sequence_identity.cc @@ -30,28 +30,43 @@ Real SequenceIdentity(const AlignmentHandle& aln, RefMode::Type ref_mode, { int non_gap_count=0; int identical_count=0; - for (AlignedColumnIterator i=aln.begin(), e=aln.end(); i!=e; ++i) { - AlignedColumn col=*i; - if (!(col[seq_a] == '-' && col[seq_b]=='-')) { + const String& sa=aln.GetSequence(seq_a).GetString(); + const String& sb=aln.GetSequence(seq_b).GetString(); + for (String::const_iterator + i=sa.begin(), e=sa.end(), j=sb.begin(); i!=e; ++i, ++j) { + if (!((*i)== '-' || (*j)=='-')) { non_gap_count++; } - if (col[seq_a]!='-' && col[seq_a]==col[seq_b]) { + if ((*i)!='-' && (*i)==(*j)) { identical_count++; } } Real seq_id=0.0; - // devide by length of longer sequence: + // divide by length of longer sequence: if (ref_mode==RefMode::LONGER_SEQUENCE) { int wo_gaps_a=aln.GetSequence(seq_a).GetGaplessString().length(); int wo_gaps_b=aln.GetSequence(seq_b).GetGaplessString().length(); if(wo_gaps_a>=wo_gaps_b) { - seq_id=100*static_cast<Real>(identical_count)/wo_gaps_a; + if (wo_gaps_a==0) { + seq_id=0; + } else { + seq_id=100*static_cast<Real>(identical_count)/wo_gaps_a; + } + } else { - seq_id=100*static_cast<Real>(identical_count)/wo_gaps_b; + if (wo_gaps_b==0) { + seq_id=0; + } else { + seq_id=100*static_cast<Real>(identical_count)/wo_gaps_b; + } } } else { - // divide by alignment length: - seq_id=100*static_cast<Real>(identical_count)/non_gap_count; + if (non_gap_count==0) { + seq_id=0; + } else { + // divide by alignment length: + seq_id=100*static_cast<Real>(identical_count)/non_gap_count; + } } return seq_id; } diff --git a/modules/seq/alg/tests/CMakeLists.txt b/modules/seq/alg/tests/CMakeLists.txt index 512de085684ed3392d006a2f3151f2174b1a744c..37969a869b14c0f740f2b8e221509120f1cd1a6e 100644 --- a/modules/seq/alg/tests/CMakeLists.txt +++ b/modules/seq/alg/tests/CMakeLists.txt @@ -1,5 +1,6 @@ set(OST_SEQ_ALG_UNIT_TESTS test_merge_pairwise_alignments.cc + test_sequence_identity.cc tests.cc ) diff --git a/modules/seq/alg/tests/test_sequence_identity.cc b/modules/seq/alg/tests/test_sequence_identity.cc new file mode 100644 index 0000000000000000000000000000000000000000..fa2d58eaface67484bfe5c69ed6486c10c33c02b --- /dev/null +++ b/modules/seq/alg/tests/test_sequence_identity.cc @@ -0,0 +1,132 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2010 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +/* + * Authors: Marco Biasini + */ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include <ost/seq/alg/sequence_identity.hh> + + + +using namespace ost; +using namespace ost::seq; + +BOOST_AUTO_TEST_SUITE(ost_seq_alg) + +BOOST_AUTO_TEST_CASE(seqid_one) +{ + SequenceHandle s1=CreateSequence("S1", "abcdefgh"); + SequenceHandle s2=CreateSequence("S2", "abcd----"); + + AlignmentHandle aln=CreateAlignment(); + aln.AddSequence(s1); + aln.AddSequence(s2); + + Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + // check the default sequence indices + seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + // check the default value for reference mode + seqid=alg::SequenceIdentity(aln); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT); + BOOST_CHECK_CLOSE(seqid, 100.0, 1e-6); +} + +BOOST_AUTO_TEST_CASE(seqid_two) +{ + SequenceHandle s1=CreateSequence("S1", "abcd"); + SequenceHandle s2=CreateSequence("S2", "xbcx"); + + AlignmentHandle aln=CreateAlignment(); + aln.AddSequence(s1); + aln.AddSequence(s2); + + Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + // check the default sequence indices + seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + // check the default value for reference mode + seqid=alg::SequenceIdentity(aln); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); + + seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT); + BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6); +} + +BOOST_AUTO_TEST_CASE(seqid_three) +{ + SequenceHandle s1=CreateSequence("S1", "a--d"); + SequenceHandle s2=CreateSequence("S2", "x--x"); + + AlignmentHandle aln=CreateAlignment(); + aln.AddSequence(s1); + aln.AddSequence(s2); + + Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + // check the default sequence indices + seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + // check the default value for reference mode + seqid=alg::SequenceIdentity(aln); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); +} + +BOOST_AUTO_TEST_CASE(seqid_empty) +{ + SequenceHandle s1=CreateSequence("S1", "---"); + SequenceHandle s2=CreateSequence("S2", "---"); + + AlignmentHandle aln=CreateAlignment(); + aln.AddSequence(s1); + aln.AddSequence(s2); + + Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + // check the default sequence indices + seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + // check the default value for reference mode + seqid=alg::SequenceIdentity(aln); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); + + seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT); + BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6); +} + + +BOOST_AUTO_TEST_SUITE_END()