diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index 1201eb58fcff984f0bf0b5887a29f011ed6fd7ac..8551f74234504a85005e20e4822e565383f594a2 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -12,6 +12,7 @@ load_entity.cc surface_io_msms_handler.cc load_surface.cc chemdict_parser.cc +mmcif_parser.cc io_profile.cc dcd_io.cc star_parser.cc @@ -20,6 +21,7 @@ PARENT_SCOPE set(OST_IO_MOL_HEADERS chemdict_parser.hh +mmcif_parser.hh star_parser.hh io_profile.hh dcd_io.hh diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc index 049b026a2f7826f9a4c49b4224942314bfc82045..d21d00278bacc559ad382fbd2246f37bff67c65e 100644 --- a/modules/io/src/mol/star_parser.cc +++ b/modules/io/src/mol/star_parser.cc @@ -27,10 +27,11 @@ namespace ost { namespace io { -StarParser::StarParser(std::istream& stream): - stream_(stream), line_num_(0), has_current_line_(false), current_line_() +StarParser::StarParser(std::istream& stream, bool items_as_row): + stream_(stream), line_num_(0), has_current_line_(false), current_line_(), + items_row_header_(), items_row_columns_(), items_row_values_() { - + items_as_row_ = items_as_row; } bool StarParser::SplitLine(const StringRef& line, @@ -200,6 +201,47 @@ void StarParser::ParseLoop() } } +void StarParser::ParseLastDataItemRow() +{ + if (items_row_header_.GetCategory().size() > 0) { + if (this->OnBeginLoop(items_row_header_)) { + this->OnDataRow(items_row_header_, items_row_columns_); + this->OnEndLoop(); + } + items_row_values_.clear(); + items_row_columns_.clear(); + items_row_header_.Clear(); + } +} + +void StarParser::ParseDataItemOrRow(StarDataItem& item) +{ + if (items_as_row_) { + // header + if (StringRef(items_row_header_.GetCategory().c_str(), + items_row_header_.GetCategory().size())!=item.GetCategory()) { + this->ParseLastDataItemRow(); + // set category for new section + items_row_header_.SetCategory(item.GetCategory()); + } + + // row + items_row_header_.Add(item.GetName()); + items_row_values_.push_back(item.GetValue().str()); + items_row_columns_.push_back(StringRef(items_row_values_.back().data(), + items_row_values_.back().length()).trim()); + } else { + this->OnDataItem(item); + } +} + +void StarParser::ParseEndDataItemRow() +{ + if (items_as_row_) { + this->ParseLastDataItemRow(); + } +} + void StarParser::ParseDataItem() { StringRef line; @@ -242,7 +284,7 @@ void StarParser::ParseDataItem() StringRef value_ref=StringRef(value.data(), value.length()).trim(); StarDataItem data_item(cat, name, value_ref); - this->OnDataItem(data_item); + this->ParseDataItemOrRow(data_item); } else { if (nv.size()!=2) { std::cout << "ERROR:" << line_num_ << ":" << line << std::endl; @@ -253,10 +295,9 @@ void StarParser::ParseDataItem() StringRef cat=nv[0].substr(1, i-nv[0].begin()-1); StringRef name=nv[0].substr(i-nv[0].begin()+1); StarDataItem data_item(cat, name, nv[1]); - this->OnDataItem(data_item); + this->ParseDataItemOrRow(data_item); this->ConsumeLine(); } - } void StarParser::ParseData() @@ -283,6 +324,7 @@ void StarParser::ParseData() break; case 'd': if (tline.length()>=5 && StringRef("data_", 5)==tline.substr(0, 5)) { + this->ParseEndDataItemRow(); this->OnEndData(); return; } @@ -297,6 +339,7 @@ void StarParser::ParseData() break; case 'l': if (tline==StringRef("loop_", 5)) { + this->ParseEndDataItemRow(); this->ParseLoop(); break; } @@ -310,6 +353,7 @@ void StarParser::ParseData() return; } } + this->ParseEndDataItemRow(); this->OnEndData(); } diff --git a/modules/io/src/mol/star_parser.hh b/modules/io/src/mol/star_parser.hh index 272a11898b1a9dc8197b0ab596a3d9bcb7068362..47aa98b0eb3fff87af2844a611e338b400b4570f 100644 --- a/modules/io/src/mol/star_parser.hh +++ b/modules/io/src/mol/star_parser.hh @@ -71,6 +71,12 @@ public: { return index_map_.size(); } + void Clear() + { + category_.clear(); + index_map_.clear(); + } + const String& GetCategory() const { return category_; } private: String category_; @@ -95,8 +101,14 @@ private: /// a list of column names and values. class DLLEXPORT_OST_IO StarParser { public: - StarParser(std::istream& istream); - + /// \brief create a StarParser + /// + /// \param stream input stream + /// \param item_as_row if true, data-items are first gathered (identifier as + /// header, values as row) and then parsed like a loop + /// (OnBeginLoop(), OnDataRow(), OnEndLoop()) + explicit StarParser(std::istream& stream, bool items_as_row=false); + virtual ~StarParser() { } // callback interface public: @@ -108,7 +120,7 @@ public: /// \brief invoked when leaving a loop /// /// OnEndLoop() is only invoked, when OnBeginLoop() returned true. - virtual void OnEndLoop() { } + virtual void OnEndLoop() { } /// \brief invoked when a data row in a loop is encountered. /// \param header describes the row format /// \param columns contains the data columns @@ -135,6 +147,14 @@ public: std::vector<StringRef>& parts, bool clear=true); private: void ParseLoop(); + /// \brief Calls the loop parsing functions on the last data item fetched to + /// be read as loop + void ParseLastDataItemRow(); + /// \brief Calls functions for parsing a data item, either as singleton + /// (default) or loop. + void ParseDataItemOrRow(StarDataItem& item); + /// \brief If enabled, calls ParseLastDataItemRow() + void ParseEndDataItemRow(); private: /// \brief read next line, replacing the current line bool NextLine(StringRef& str) @@ -171,6 +191,10 @@ private: int line_num_; bool has_current_line_; String current_line_; + bool items_as_row_; + StarLoopDesc items_row_header_; + std::vector<StringRef> items_row_columns_; + std::vector<String> items_row_values_; }; }} diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index abbd589147f64663b2ab685d7deab0ef8ffa8679..dc2c8bfdfc87a8a9780da0749d4648552b3678cb 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -9,6 +9,7 @@ set(OST_IO_UNIT_TESTS test_iomanager.cc tests.cc test_star_parser.cc + test_mmcif_parser.cc ) ost_unittest(MODULE io diff --git a/modules/io/tests/test_star_parser.cc b/modules/io/tests/test_star_parser.cc index 0b31e3dd829ddcdd3298731ecaa6b4b1215b4a2f..fc58a63b62265264e3c06d27abd15524cfae3f1e 100644 --- a/modules/io/tests/test_star_parser.cc +++ b/modules/io/tests/test_star_parser.cc @@ -20,6 +20,7 @@ Author: Marco Biasini */ #include <fstream> +#include <math.h> #include <ost/mol/mol.hh> #include <ost/io/io_exception.hh> #include <ost/io/mol/star_parser.hh> @@ -52,6 +53,16 @@ public: s4=item.GetValue().str(); } } + virtual bool OnBeginLoop(const StarLoopDesc& header) + { + return true; + } + virtual void OnDataRow(const StarLoopDesc& header, + const std::vector<StringRef>& columns) + { + BOOST_CHECK_EQUAL(header.GetCategory(), "loop"); + BOOST_CHECK_EQUAL(columns.size(), size_t(4)); + } String s1; String s2; String s3; @@ -66,7 +77,7 @@ public: { BOOST_CHECK_EQUAL(header.GetCategory(), "loop"); return true; - } + } virtual void OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -103,6 +114,90 @@ public: bool visit_two; }; +class ItemsAsRowTestParser : public StarParser { +public: + ItemsAsRowTestParser(std::istream& stream): StarParser(stream, true), + category("") + { } + + virtual bool OnBeginLoop(const StarLoopDesc& header) + { + if ((header.GetCategory() == "data-item1") || + (header.GetCategory() == "data-item2") || + (header.GetCategory() == "data-item3") || + (header.GetCategory() == "data-item4")) { + return true; + } + return false; + } + + virtual void OnDataRow(const StarLoopDesc& header, + const std::vector<StringRef>& columns) + { + BOOST_CHECK_EQUAL(columns.size(), size_t(4)); + category = header.GetCategory(); + if (header.GetCategory() == "data-item1") { + s1 = columns[header.GetIndex("s1")].str(); + s2 = columns[header.GetIndex("s2")].str(); + s3 = columns[header.GetIndex("s3")].str(); + s4 = columns[header.GetIndex("s4")].str(); + return; + } + if (header.GetCategory() == "data-item2") { + i1 = columns[header.GetIndex("i1")].to_int().second; + i2 = columns[header.GetIndex("i2")].to_int().second; + i3 = columns[header.GetIndex("i3")].to_int().second; + i4 = columns[header.GetIndex("i4")].to_int().second; + return; + } + if (header.GetCategory() == "data-item3") { + f1 = columns[header.GetIndex("f1")].to_float().second; + f2 = columns[header.GetIndex("f2")].to_float().second; + f3 = columns[header.GetIndex("f3")].to_float().second; + f4 = columns[header.GetIndex("f4")].to_float().second; + return; + } + if (header.GetCategory() == "data-item4") { + s5 = columns[header.GetIndex("s1")].str(); + s6 = columns[header.GetIndex("s2")].str(); + s7 = columns[header.GetIndex("s3")].str(); + s8 = columns[header.GetIndex("s4")].str(); + return; + } + } + + void OnEndLoop() + { + if (category == "data-item3") { + f1 = ceil(f1*2); + f2 = ceil(f2*2); + f3 = ceil(f3*2); + f4 = ceil(f4*2); + } + } + + String category; + + String s1; + String s2; + String s3; + String s4; + String s5; + String s6; + String s7; + String s8; + + int i1; + int i2; + int i3; + int i4; + + float f1; + float f2; + float f3; + float f4; +}; + BOOST_AUTO_TEST_SUITE( io ); BOOST_AUTO_TEST_CASE(star_split1) @@ -182,6 +277,30 @@ BOOST_AUTO_TEST_CASE(star_loop) BOOST_CHECK_EQUAL(star_p.lines[4][2], "15"); } +BOOST_AUTO_TEST_CASE(star_items_as_row) +{ + std::ifstream s("testfiles/items-as-row.cif"); + ItemsAsRowTestParser star_p(s); + star_p.Parse(); + BOOST_CHECK_EQUAL(star_p.s1, "a"); + BOOST_CHECK_EQUAL(star_p.s2, "a b c"); + BOOST_CHECK_EQUAL(star_p.s3, "a\nb\nc"); + BOOST_CHECK_EQUAL(star_p.s4, "a'b"); + BOOST_CHECK_EQUAL(star_p.i1, 1); + BOOST_CHECK_EQUAL(star_p.i2, 2); + BOOST_CHECK_EQUAL(star_p.i3, 3); + BOOST_CHECK_EQUAL(star_p.i4, 4); + BOOST_CHECK_EQUAL(ceil(star_p.f1), ceil(2.22)); + BOOST_CHECK_EQUAL(ceil(star_p.f2), ceil(2.44)); + BOOST_CHECK_EQUAL(ceil(star_p.f3), ceil(2.66)); + BOOST_CHECK_EQUAL(ceil(star_p.f4), ceil(2.88)); + + BOOST_CHECK_EQUAL(star_p.s5, "1.11"); + BOOST_CHECK_EQUAL(star_p.s6, "1.22"); + BOOST_CHECK_EQUAL(star_p.s7, "1.33"); + BOOST_CHECK_EQUAL(star_p.s8, "1.44"); +} + BOOST_AUTO_TEST_CASE(star_missing_data) { std::ifstream s("testfiles/missing_data.cif"); diff --git a/modules/io/tests/testfiles/items-as-row.cif b/modules/io/tests/testfiles/items-as-row.cif new file mode 100644 index 0000000000000000000000000000000000000000..db06b3565b3fbd4a5d4aadf225c2ade79008c6ff --- /dev/null +++ b/modules/io/tests/testfiles/items-as-row.cif @@ -0,0 +1,52 @@ +data_items-as-row + +_data-item1.s1 a +_data-item1.s2 'a b c' +_data-item1.s3 +; +a +b +c +; +_data-item1.s4 'a'b' +_data-item2.i1 1 +_data-item2.i2 '2' +_data-item2.i3 +; +3 +; +_data-item2.i4 +4 + +data_more-items-as-row + +_data-item3.f1 1.11 +_data-item3.f2 1.22 +_data-item3.f3 +; +1.33 +; +_data-item3.f4 1.44 + +loop_ +_loop.s1 +_loop.s2 +_loop.s3 +_loop.s4 +1 2 3 4 + +loop_ +_loop.s1 +_loop.s2 +_loop.s3 +_loop.s4 +1 2 3 6 +4 5 6 8 + +_data-item4.s1 1.11 +_data-item4.s2 1.22 +_data-item4.s3 +; +1.33 +; +_data-item4.s4 1.44 diff --git a/modules/io/tests/testfiles/missing_data.cif b/modules/io/tests/testfiles/missing_data.cif new file mode 100644 index 0000000000000000000000000000000000000000..a239bd0ef9f8dd9fd79b8280b02323f18a5c0be7 --- /dev/null +++ b/modules/io/tests/testfiles/missing_data.cif @@ -0,0 +1,4 @@ +this file has no 'data_' tag. + +data_ +