Skip to content
Snippets Groups Projects
Commit 54b00456 authored by marco's avatar marco
Browse files

speedup and unit test for sequence identity calculation

git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@2676 5a81b35b-ba03-0410-adc8-b2c5c5119f08
parent 9ababd6b
No related branches found
No related tags found
No related merge requests found
......@@ -35,8 +35,8 @@ BOOST_PYTHON_MODULE(_seq_alg)
.value("LONGER_SEQUENCE", RefMode::LONGER_SEQUENCE)
.export_values()
;
def("SequenceIdentity", &SequenceIdentity, arg("seq_a")=0, arg("seq_b")=1);
def("SequenceIdentity", &SequenceIdentity,
(arg("ref_mode")=RefMode::ALIGNMENT, arg("seq_a")=0, arg("seq_b")=1));
class_<AlignedRegionList>("AlignedRegionList", init<>())
.def(vector_indexing_suite<AlignedRegionList>())
;
......
......@@ -30,28 +30,43 @@ Real SequenceIdentity(const AlignmentHandle& aln, RefMode::Type ref_mode,
{
int non_gap_count=0;
int identical_count=0;
for (AlignedColumnIterator i=aln.begin(), e=aln.end(); i!=e; ++i) {
AlignedColumn col=*i;
if (!(col[seq_a] == '-' && col[seq_b]=='-')) {
const String& sa=aln.GetSequence(seq_a).GetString();
const String& sb=aln.GetSequence(seq_b).GetString();
for (String::const_iterator
i=sa.begin(), e=sa.end(), j=sb.begin(); i!=e; ++i, ++j) {
if (!((*i)== '-' || (*j)=='-')) {
non_gap_count++;
}
if (col[seq_a]!='-' && col[seq_a]==col[seq_b]) {
if ((*i)!='-' && (*i)==(*j)) {
identical_count++;
}
}
Real seq_id=0.0;
// devide by length of longer sequence:
// divide by length of longer sequence:
if (ref_mode==RefMode::LONGER_SEQUENCE) {
int wo_gaps_a=aln.GetSequence(seq_a).GetGaplessString().length();
int wo_gaps_b=aln.GetSequence(seq_b).GetGaplessString().length();
if(wo_gaps_a>=wo_gaps_b) {
seq_id=100*static_cast<Real>(identical_count)/wo_gaps_a;
if (wo_gaps_a==0) {
seq_id=0;
} else {
seq_id=100*static_cast<Real>(identical_count)/wo_gaps_a;
}
} else {
seq_id=100*static_cast<Real>(identical_count)/wo_gaps_b;
if (wo_gaps_b==0) {
seq_id=0;
} else {
seq_id=100*static_cast<Real>(identical_count)/wo_gaps_b;
}
}
} else {
// divide by alignment length:
seq_id=100*static_cast<Real>(identical_count)/non_gap_count;
if (non_gap_count==0) {
seq_id=0;
} else {
// divide by alignment length:
seq_id=100*static_cast<Real>(identical_count)/non_gap_count;
}
}
return seq_id;
}
......
set(OST_SEQ_ALG_UNIT_TESTS
test_merge_pairwise_alignments.cc
test_sequence_identity.cc
tests.cc
)
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
/*
* Authors: Marco Biasini
*/
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include <ost/seq/alg/sequence_identity.hh>
using namespace ost;
using namespace ost::seq;
BOOST_AUTO_TEST_SUITE(ost_seq_alg)
BOOST_AUTO_TEST_CASE(seqid_one)
{
SequenceHandle s1=CreateSequence("S1", "abcdefgh");
SequenceHandle s2=CreateSequence("S2", "abcd----");
AlignmentHandle aln=CreateAlignment();
aln.AddSequence(s1);
aln.AddSequence(s2);
Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
// check the default sequence indices
seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
// check the default value for reference mode
seqid=alg::SequenceIdentity(aln);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT);
BOOST_CHECK_CLOSE(seqid, 100.0, 1e-6);
}
BOOST_AUTO_TEST_CASE(seqid_two)
{
SequenceHandle s1=CreateSequence("S1", "abcd");
SequenceHandle s2=CreateSequence("S2", "xbcx");
AlignmentHandle aln=CreateAlignment();
aln.AddSequence(s1);
aln.AddSequence(s2);
Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
// check the default sequence indices
seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
// check the default value for reference mode
seqid=alg::SequenceIdentity(aln);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT);
BOOST_CHECK_CLOSE(seqid, 50.0, 1e-6);
}
BOOST_AUTO_TEST_CASE(seqid_three)
{
SequenceHandle s1=CreateSequence("S1", "a--d");
SequenceHandle s2=CreateSequence("S2", "x--x");
AlignmentHandle aln=CreateAlignment();
aln.AddSequence(s1);
aln.AddSequence(s2);
Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
// check the default sequence indices
seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
// check the default value for reference mode
seqid=alg::SequenceIdentity(aln);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
}
BOOST_AUTO_TEST_CASE(seqid_empty)
{
SequenceHandle s1=CreateSequence("S1", "---");
SequenceHandle s2=CreateSequence("S2", "---");
AlignmentHandle aln=CreateAlignment();
aln.AddSequence(s1);
aln.AddSequence(s2);
Real seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE, 0, 1);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
// check the default sequence indices
seqid=alg::SequenceIdentity(aln, alg::RefMode::LONGER_SEQUENCE);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
// check the default value for reference mode
seqid=alg::SequenceIdentity(aln);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
seqid=alg::SequenceIdentity(aln, alg::RefMode::ALIGNMENT);
BOOST_CHECK_CLOSE(seqid, 0.0, 1e-6);
}
BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment