From 4ac959df53fa2050b46127c753c36a129e619e10 Mon Sep 17 00:00:00 2001 From: marco <marco@5a81b35b-ba03-0410-adc8-b2c5c5119f08> Date: Thu, 19 Aug 2010 11:00:47 +0000 Subject: [PATCH] added import of PIR files This is BZDNG-153 git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@2656 5a81b35b-ba03-0410-adc8-b2c5c5119f08 --- modules/io/src/seq/pir_io_handler.cc | 89 +++++++++++++------- modules/io/tests/CMakeLists.txt | 1 + modules/io/tests/test_pir.cc | 73 ++++++++++++++++ modules/io/tests/testfiles/pir/no-star.pir | 21 +++++ modules/io/tests/testfiles/pir/seq-types.pir | 21 +++++ modules/io/tests/testfiles/pir/simple.pir | 4 + modules/seq/base/src/invalid_sequence.hh | 1 + 7 files changed, 180 insertions(+), 30 deletions(-) create mode 100644 modules/io/tests/test_pir.cc create mode 100644 modules/io/tests/testfiles/pir/no-star.pir create mode 100644 modules/io/tests/testfiles/pir/seq-types.pir create mode 100644 modules/io/tests/testfiles/pir/simple.pir diff --git a/modules/io/src/seq/pir_io_handler.cc b/modules/io/src/seq/pir_io_handler.cc index a5d07c9ee..2974dcd51 100644 --- a/modules/io/src/seq/pir_io_handler.cc +++ b/modules/io/src/seq/pir_io_handler.cc @@ -72,50 +72,79 @@ bool PirIOHandler::ProvidesExport(const boost::filesystem::path& loc, return PirIOHandler::ProvidesImport(loc, format); } -void PirIOHandler::Import(seq::SequenceList& aln, - std::istream& instream) +void PirIOHandler::Import(seq::SequenceList& aln, std::istream& instream) { - const char* error_msg="Bad Pir file: Expected '>', but '%1%' found."; - + const char* error_msg="Expected %1%, but '%2%' found."; + const char* premature_end="Premature end in PIR file on line %1%"; + const char* empty_seq="PIR file contains empty sequence on line %1%"; String line; - std::getline(instream, line); - int seq_count=0; - //not yet supported - throw IOException("Import of PIR format is not yet supported!"); + int line_num=1, seq_count=0; + // we can't use a normal while(std::getline(...)) here, because the PIR format + // requires us to perform a one-line look-ahead. + std::getline(instream, line); while (!instream.eof()) { - // parse header information. cut after first "|" + // skip empty lines if (line.find_first_not_of("\n\t ")==String::npos) { - std::getline(instream, line); continue; } - if (line.length()==0 || line[0]!='>') { - String error=str(format(error_msg) % line); + if (line[0]!='>') { + String error=str(format(error_msg) % "'>'" % line); + throw IOException(error); + } + if (line.length()<4) { + String error=str(format(error_msg) % "seq-type;identifier" % line); throw IOException(error); } - String identifier=line.substr(1); - String sequence_str; - while (std::getline(instream, line) && line.size()>0 && line[0]!='>') { - if (line.find_first_not_of("\n\t ")==String::npos) { - continue; + String seq_type=line.substr(1, 2); + if (!(seq_type=="P1" || seq_type=="F1" || seq_type=="DL" || + seq_type=="DC" || seq_type=="RL" || seq_type=="RC" || + seq_type=="XX")) { + throw IOException("Bad PIR file: Unknown sequence type"/*" '%1%' on line %2%" % seq_type % line_nu*m*/); + } + String name=line.substr(4); + int subject_line_num=line_num; + if (!std::getline(instream, line)) { + throw IOException(str(format(premature_end) % line_num)); + } + line_num+=1; + std::stringstream seq_string; + bool end_seq=false; + while (!end_seq && std::getline(instream, line)) { + line_num+=1; + for (String::iterator i=line.begin(), e=line.end(); i!=e; ++i) { + if (isspace(*i)) { + continue; + } + if (*i=='*') { + end_seq=true; + break; + } + seq_string << *i; } - sequence_str+=line; } - if (sequence_str.length()>0) { - try { - seq::SequenceHandle seq=seq::CreateSequence(identifier, sequence_str); - aln.AddSequence(seq); - seq_count+=1; - } catch (seq::InvalidSequence& e) { - throw e; + while (std::getline(instream, line)) { + line_num+=1; + if (!line.empty() && line[0]=='>') { + break; } - } else { - throw IOException("Bad Pir file: Sequence is empty."); } - } + if (!end_seq) { + throw IOException(str(format(premature_end) % line_num)); + } + if (seq_string.str().empty()) { + throw IOException(str(format(empty_seq) % subject_line_num)); + } + try { + seq::SequenceHandle ss=seq::CreateSequence(name, seq_string.str()); + aln.AddSequence(ss); + seq_count+=1; + } catch (seq::InvalidSequence& e) { + throw e; + } + } if (seq_count==0) { throw IOException("Bad Pir file: File is empty"); - } - + } } void PirIOHandler::Export(const seq::ConstSequenceList& seqs, diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index f0c230801..1cb26f5fd 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -3,6 +3,7 @@ set(OST_IO_UNIT_TESTS test_io_pdb.cc test_io_crd.cc test_io_sdf.cc + test_pir.cc test_iomanager.cc tests.cc ) diff --git a/modules/io/tests/test_pir.cc b/modules/io/tests/test_pir.cc new file mode 100644 index 000000000..1829404cc --- /dev/null +++ b/modules/io/tests/test_pir.cc @@ -0,0 +1,73 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2010 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +/* + Author: Marco Biasini + */ + +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> +#include <ost/seq/invalid_sequence.hh> +#include <ost/io/seq/pir_io_handler.hh> +#include <ost/io/seq/load.hh> +#include <ost/io/io_exception.hh> + + +using namespace ost; +using namespace ost::io; + + +BOOST_AUTO_TEST_SUITE( io ) + + +BOOST_AUTO_TEST_CASE(pir_filetypes) +{ + BOOST_CHECK(PirIOHandler::ProvidesImport("","pir")); + BOOST_CHECK(PirIOHandler::ProvidesImport("alignment.pir")); +} + +BOOST_AUTO_TEST_CASE(pir_simple) +{ + seq::SequenceHandle s=LoadSequence("testfiles/pir/simple.pir"); + BOOST_CHECK_EQUAL(s.GetName(), "name"); + BOOST_CHECK_EQUAL(s.GetString(), "ABCDEFGHIJKLMNOP"); +} + +BOOST_AUTO_TEST_CASE(pir_seq_types) +{ + seq::SequenceList s=LoadSequenceList("testfiles/pir/seq-types.pir"); +} + +BOOST_AUTO_TEST_CASE(pir_errors) +{ + BOOST_CHECK_THROW(SequenceFromString(">", "pir"), IOException); + BOOST_CHECK_THROW(SequenceFromString(">P1", "pir"), IOException); + BOOST_CHECK_THROW(SequenceFromString(">P1;", "pir"), IOException); + BOOST_CHECK_THROW(SequenceFromString(">P1;name\n", "pir"), IOException); + BOOST_CHECK_THROW(SequenceFromString(">P1;name\ndescription\n", "pir"), + IOException); +} + +BOOST_AUTO_TEST_CASE(pir_no_star) +{ + BOOST_CHECK_THROW(LoadSequence("testfiles/pir/no-star.pir"), + seq::InvalidSequence); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/modules/io/tests/testfiles/pir/no-star.pir b/modules/io/tests/testfiles/pir/no-star.pir new file mode 100644 index 000000000..98bc371aa --- /dev/null +++ b/modules/io/tests/testfiles/pir/no-star.pir @@ -0,0 +1,21 @@ +>P1;name +description +ABCD EFGH* +>F1;name +description +ABCD EFGH +>DL;name* +description +ABCD EFGH +>DC;name* +description +ABCD EFGH +>RL;name* +description +ABCD EFGH +>RC;name* +description +ABCD EFGH +>XX;name* +description +ABCD EFGH* \ No newline at end of file diff --git a/modules/io/tests/testfiles/pir/seq-types.pir b/modules/io/tests/testfiles/pir/seq-types.pir new file mode 100644 index 000000000..ed8569f43 --- /dev/null +++ b/modules/io/tests/testfiles/pir/seq-types.pir @@ -0,0 +1,21 @@ +>P1;name +description +ABCD EFGH* +>F1;name +description +ABCD EFGH* +>DL;name +description +ABCD EFGH* +>DC;name +description +ABCD EFGH* +>RL;name +description +ABCD EFGH* +>RC;name* +description +ABCD EFGH* +>XX;name +description +ABCD EFGH* \ No newline at end of file diff --git a/modules/io/tests/testfiles/pir/simple.pir b/modules/io/tests/testfiles/pir/simple.pir new file mode 100644 index 000000000..6e8f03e4e --- /dev/null +++ b/modules/io/tests/testfiles/pir/simple.pir @@ -0,0 +1,4 @@ +>P1;name +description +ABCD EFGH +IJKL MNOP* \ No newline at end of file diff --git a/modules/seq/base/src/invalid_sequence.hh b/modules/seq/base/src/invalid_sequence.hh index b112df8f3..453916403 100644 --- a/modules/seq/base/src/invalid_sequence.hh +++ b/modules/seq/base/src/invalid_sequence.hh @@ -23,6 +23,7 @@ Author: Marco Biasini */ #include <ost/seq/module_config.hh> +#include <ost/message.hh> namespace ost { namespace seq { class DLLEXPORT InvalidSequence : public Error { -- GitLab