diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc index b440181c98e98f573719fc51798e59366ee1440d..25b71ed93c142386bb905f966b93b4ac97c6c8d0 100644 --- a/modules/io/src/mol/star_parser.cc +++ b/modules/io/src/mol/star_parser.cc @@ -96,11 +96,7 @@ bool StarParser::SplitLine(const StringRef& line, while (s!=line.end() && !isspace(*s)) { ++s; } - if (s-start) { - parts.push_back(StringRef(start, s-start)); - } else { - return false; - } + parts.push_back(StringRef(start, s-start)); } } return true; @@ -110,18 +106,28 @@ bool StarParser::ParseMultilineValue(String& value, bool skip) { std::stringstream valuebuf; StringRef line; - bool r=this->GetLine(line); - assert(r);r=r; + if (!this->GetLine(line)) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Unexpected end of input", + line_num_)); + } valuebuf << line.substr(1); + bool found_semicolon = false; while (this->NextLine(line)) { StringRef tline=line.rtrim(); if (!tline.empty() && tline[0]==';') { + found_semicolon = true; break; } if (!skip) { valuebuf << tline << "\n"; } - } + } + if (!found_semicolon) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Unterminated multiline value", + line_num_)); + } if (!skip) { value=valuebuf.str(); } @@ -148,7 +154,14 @@ void StarParser::ParseLoop() prefix_len=tline.find('.')-tline.begin(); header.SetCategory(tline.substr(1, prefix_len-1)); } else { - assert(tline[prefix_len]=='.'); + if (tline[prefix_len] != '.' || + StringRef(header.GetCategory().data(), + header.GetCategory().size())!=tline.substr(1, + prefix_len-1)) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Change of category in loop", + line_num_)); + } } header.Add(tline.substr(prefix_len+1)); this->ConsumeLine(); @@ -182,8 +195,7 @@ void StarParser::ParseLoop() case ';': if (process_rows) { tmp_values.push_back(String()); - bool r=this->ParseMultilineValue(tmp_values.back()); - assert(r);r=r; + this->ParseMultilineValue(tmp_values.back()); columns.push_back(StringRef(tmp_values.back().data(), tmp_values.back().length()).trim()); if (columns.size()==header.GetSize()) { @@ -193,8 +205,7 @@ void StarParser::ParseLoop() } } else { String s; - bool r=this->ParseMultilineValue(s, true); - assert(r);r=r; + this->ParseMultilineValue(s, true); } break; case 'd': @@ -210,8 +221,7 @@ void StarParser::ParseLoop() default: if (process_rows) { int before=columns.size(); - bool r=StarParser::SplitLine(tline, columns, false); - assert(r);r=r; + StarParser::SplitLine(tline, columns, false); if (columns.size()==header.GetSize()) { this->OnDataRow(header, columns); tmp_values.clear(); @@ -278,8 +288,7 @@ void StarParser::ParseEndDataItemRow() void StarParser::ParseDataItem() { StringRef line; - bool r=this->GetLine(line); - assert(r);r=r; + this->GetLine(line); // optimize for common case when name/value are present on the same line. // We don't have to allocate any additional strings in that case. std::vector<StringRef> nv; @@ -302,42 +311,59 @@ void StarParser::ParseDataItem() StarParser::SplitLine(StringRef(value.data(), value.length()), nv, false); if (nv.size()!=2) { - std::cout << "ERROR:" << line_num_ << ":" << tline << std::endl; + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "More than 1 value for data item "+ identifier, + line_num_)); } - assert(nv.size()==2); this->ConsumeLine(); } break; } - size_t i=identifier.find('.'); - assert(i!=String::npos); + if (value.empty()) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Unexpected end of input", + line_num_)); + } StringRef id_ref(identifier.data(), identifier.size()); - StringRef cat=StringRef(id_ref.substr(1, i-1)); - StringRef name=id_ref.substr(i+1); + StringRef cat; + StringRef name; StringRef value_ref=StringRef(value.data(), value.length()).trim(); + this->ParseDataItemIdent(id_ref, cat, name); StarDataItem data_item(cat, name, value_ref); this->ParseDataItemOrRow(data_item); } else { if (nv.size()!=2) { - std::cout << "ERROR:" << line_num_ << ":" << line << std::endl; - } - assert(nv.size()==2); - StringRef::const_iterator i=nv[0].find('.'); - assert(i!=nv[0].end()); - StringRef cat=nv[0].substr(1, i-nv[0].begin()-1); - StringRef name=nv[0].substr(i-nv[0].begin()+1); + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "More than 1 value for data item "+ line.str(), + line_num_)); + } + StringRef cat; + StringRef name; + this->ParseDataItemIdent(nv[0], cat, name); StarDataItem data_item(cat, name, nv[1]); this->ParseDataItemOrRow(data_item); this->ConsumeLine(); } } +void StarParser::ParseDataItemIdent(const StringRef ident, + StringRef& cat, StringRef& name) +{ + StringRef::const_iterator i=ident.find('.'); + if (i == ident.end()) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Invalid data-item identifier '" + ident.str() + "'", + line_num_)); + } + cat=ident.substr(1, i-ident.begin()-1); + name=ident.substr(i-ident.begin()+1); +} + void StarParser::ParseData() { StringRef line; - bool r=this->GetLine(line); - assert(r);r=r; + this->GetLine(line); StringRef data_id=line.rtrim().substr(5); bool skip=!this->OnBeginData(data_id); this->ConsumeLine(); @@ -364,10 +390,7 @@ void StarParser::ParseData() case ';': if (skip) { String s; - bool r=this->ParseMultilineValue(s, true); - assert(r);r=r; - } else { - assert(0 && "';' when skip==false"); + this->ParseMultilineValue(s, true); } break; case 'l': @@ -394,8 +417,7 @@ void StarParser::DiagnoseUnknown() { std::stringstream ss; StringRef line; - bool r=this->GetLine(line); - assert(r);r=r; + this->GetLine(line); ss << "unknown control structure '"<< line.rtrim() << "'"; throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, ss.str(), line_num_)); diff --git a/modules/io/src/mol/star_parser.hh b/modules/io/src/mol/star_parser.hh index e151e6c093b1cb6f2d214e707f55893148dae43a..382356627b6f97851a3d4feaf7bc8752a92ae2c8 100644 --- a/modules/io/src/mol/star_parser.hh +++ b/modules/io/src/mol/star_parser.hh @@ -151,6 +151,11 @@ public: /// \brief format diagnostic and returns it as a string. String FormatDiagnostic(StarDiagType type, const String& message, int line=-1); + + void SetFilename(const String& filename) + { + filename_ = filename; + } public: void Parse(); @@ -194,6 +199,9 @@ private: assert(has_current_line_); has_current_line_=false; } + + void ParseDataItemIdent(const StringRef ident, + StringRef& cat, StringRef& name); void ParseGlobal(); void ParseData(); void ParseDataItem(); diff --git a/modules/io/tests/test_star_parser.cc b/modules/io/tests/test_star_parser.cc index 67a818fc24df8500c40a7db3f86b8c7aedc4b768..d91f19d6b1381a16a10cce3b66d2a0b20c3ed8d1 100644 --- a/modules/io/tests/test_star_parser.cc +++ b/modules/io/tests/test_star_parser.cc @@ -247,7 +247,7 @@ BOOST_AUTO_TEST_CASE(star_data_item) BOOST_AUTO_TEST_CASE(format_diag_stream) { - BOOST_MESSAGE(" Running star_data_item tests..."); + BOOST_MESSAGE(" Running format_diag_stream tests..."); std::ifstream s("testfiles/data-item.cif"); DataItemTestParser star_p(s); BOOST_CHECK_EQUAL(star_p.FormatDiagnostic(STAR_DIAG_WARNING, "bad", -1), @@ -256,11 +256,14 @@ BOOST_AUTO_TEST_CASE(format_diag_stream) "<stream>: error: really bad"); BOOST_CHECK_EQUAL(star_p.FormatDiagnostic(STAR_DIAG_ERROR, "bad", 55), "<stream>:55: error: bad"); + star_p.SetFilename("testname"); + BOOST_CHECK_EQUAL(star_p.FormatDiagnostic(STAR_DIAG_ERROR, "bad", 55), + "testname:55: error: bad"); } BOOST_AUTO_TEST_CASE(format_diag_filename) { - BOOST_MESSAGE(" Running star_data_item tests..."); + BOOST_MESSAGE(" Running format_diag_filename tests..."); DataItemTestParser star_p("testfiles/data-item.cif"); BOOST_CHECK_EQUAL(star_p.FormatDiagnostic(STAR_DIAG_WARNING, "bad", -1), "testfiles/data-item.cif: warning: bad"); @@ -335,5 +338,62 @@ BOOST_AUTO_TEST_CASE(star_missing_data) LoopTestParser star_p(s); BOOST_CHECK_THROW(star_p.Parse(), IOException); } + +BOOST_AUTO_TEST_CASE(star_unterminated_dataitem) +{ + std::ifstream s("testfiles/unterminated_dataitem.cif"); + LoopTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_incomplete_dataitem) +{ + std::ifstream s("testfiles/incomplete_dataitem.cif"); + LoopTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_singleline_multiple_values) +{ + std::ifstream s("testfiles/singleline_multivalue_dataitem.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_multiline_multiple_values) +{ + std::ifstream s("testfiles/multiline_multivalue_dataitem.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_multiline_invalid_ident) +{ + std::ifstream s("testfiles/multiline_invalid_ident.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_singleline_invalid_ident) +{ + std::ifstream s("testfiles/singleline_invalid_ident.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_loop_category_change) +{ + std::ifstream s("testfiles/loop_category_change.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + +BOOST_AUTO_TEST_CASE(star_loop_category_change_inplace) +{ + std::ifstream s("testfiles/loop_category_change_inplace.cif"); + DataItemTestParser star_p(s); + BOOST_CHECK_THROW(star_p.Parse(), IOException); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/testfiles/incomplete_dataitem.cif b/modules/io/tests/testfiles/incomplete_dataitem.cif new file mode 100644 index 0000000000000000000000000000000000000000..f2b94524e5bcc084994cf6e12a77c4ac0ccfcb0b --- /dev/null +++ b/modules/io/tests/testfiles/incomplete_dataitem.cif @@ -0,0 +1,3 @@ +data_incomplete data-item + +_a.x diff --git a/modules/io/tests/testfiles/loop_category_change.cif b/modules/io/tests/testfiles/loop_category_change.cif new file mode 100644 index 0000000000000000000000000000000000000000..422d3b4b625b33b747a64b0650aa6ae938191c4a --- /dev/null +++ b/modules/io/tests/testfiles/loop_category_change.cif @@ -0,0 +1,7 @@ +data_singleline multi value data-item + +_loop +_a.x +_a.y +_ab.z +1 2 3 diff --git a/modules/io/tests/testfiles/loop_category_change_inplace.cif b/modules/io/tests/testfiles/loop_category_change_inplace.cif new file mode 100644 index 0000000000000000000000000000000000000000..5b69631545a3690ef842ac1d946e115b919f1335 --- /dev/null +++ b/modules/io/tests/testfiles/loop_category_change_inplace.cif @@ -0,0 +1,7 @@ +data_singleline multi value data-item + +_loop +_a.x +_a.y +_b.z +1 2 3 diff --git a/modules/io/tests/testfiles/multiline_invalid_ident.cif b/modules/io/tests/testfiles/multiline_invalid_ident.cif new file mode 100644 index 0000000000000000000000000000000000000000..b4aa41c5fb6f87effdcc2778c35bdcc270d1b5e7 --- /dev/null +++ b/modules/io/tests/testfiles/multiline_invalid_ident.cif @@ -0,0 +1,6 @@ +data_singleline multi value data-item + +_ax +; +b +; diff --git a/modules/io/tests/testfiles/multiline_multivalue_dataitem.cif b/modules/io/tests/testfiles/multiline_multivalue_dataitem.cif new file mode 100644 index 0000000000000000000000000000000000000000..d4e736cb1171f00db8c210dddfa9fd1973d5fa53 --- /dev/null +++ b/modules/io/tests/testfiles/multiline_multivalue_dataitem.cif @@ -0,0 +1,4 @@ +data_singleline multi value data-item + +_a.x +a b diff --git a/modules/io/tests/testfiles/singleline_invalid_ident.cif b/modules/io/tests/testfiles/singleline_invalid_ident.cif new file mode 100644 index 0000000000000000000000000000000000000000..be64cbc7146d4e69d6964ccdb07e80c4471740d3 --- /dev/null +++ b/modules/io/tests/testfiles/singleline_invalid_ident.cif @@ -0,0 +1,3 @@ +data_singleline multi value data-item + +_ax b diff --git a/modules/io/tests/testfiles/singleline_multivalue_dataitem.cif b/modules/io/tests/testfiles/singleline_multivalue_dataitem.cif new file mode 100644 index 0000000000000000000000000000000000000000..f3c64e5549b836034ccbf6e4036eb8c4870c75f3 --- /dev/null +++ b/modules/io/tests/testfiles/singleline_multivalue_dataitem.cif @@ -0,0 +1,3 @@ +data_singleline multi value data-item + +_a.x a b diff --git a/modules/io/tests/testfiles/unterminated_dataitem.cif b/modules/io/tests/testfiles/unterminated_dataitem.cif new file mode 100644 index 0000000000000000000000000000000000000000..928074684830186dc7c464d4843606aeb5c0ef52 --- /dev/null +++ b/modules/io/tests/testfiles/unterminated_dataitem.cif @@ -0,0 +1,5 @@ +data_incomplete data-item + +_a.x +; +2