From 8b33268ab4d9356d4bb6fbe12b4b1a77e70c5218 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 13 Jun 2017 23:24:15 +0200 Subject: [PATCH] Bugfix regarding StringRef StringRefs only contain pointers onto data managed by std::strings. If those strings are copied or deleted, the StringRefs might point into nirvana --- modules/io/src/mol/star_parser.cc | 49 ++++++++++++++++--------------- modules/io/src/mol/star_parser.hh | 12 +++++++- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc index 0de35fec5..80043259b 100644 --- a/modules/io/src/mol/star_parser.cc +++ b/modules/io/src/mol/star_parser.cc @@ -33,7 +33,7 @@ namespace ost { namespace io { StarParser::StarParser(std::istream& stream, bool items_as_row): filename_("<stream>"), line_num_(0), has_current_line_(false), current_line_(), - items_row_header_(), file_open_(true), items_row_columns_(), + items_row_header_(), file_open_(true), items_row_values_() { items_as_row_ = items_as_row; @@ -48,7 +48,7 @@ StarParser::StarParser(std::istream& stream, bool items_as_row): StarParser::StarParser(const String& filename, bool items_as_row): fstream_(filename.c_str()), filename_(filename), line_num_(0), has_current_line_(false), current_line_(), - items_row_header_(), file_open_(true), items_row_columns_(), + items_row_header_(), file_open_(true), items_row_values_() { items_as_row_=items_as_row; @@ -292,7 +292,6 @@ void StarParser::ParseLoop() } } bool process_rows=this->OnBeginLoop(header); - std::vector<StringRef> columns; std::vector<String> tmp_values; // optimized for the common case where all values are present on the same // line. @@ -310,12 +309,9 @@ void StarParser::ParseLoop() if (process_rows) { tmp_values.push_back(String()); this->ParseMultilineValue(tmp_values.back()); - columns.push_back(StringRef(tmp_values.back().data(), - tmp_values.back().length()).trim()); - if (columns.size()==header.GetSize()) { - this->OnDataRow(header, columns); + if (tmp_values.size()==header.GetSize()) { + this->CallOnDataRow(header, tmp_values); tmp_values.clear(); - columns.clear(); } } else { String s; @@ -334,19 +330,14 @@ void StarParser::ParseLoop() } default: if (process_rows) { - int before=columns.size(); - StarParser::SplitLine(tline, columns, false); - if (columns.size()==header.GetSize()) { - this->OnDataRow(header, columns); + std::vector<StringRef> split_elements; + StarParser::SplitLine(tline, split_elements); + for(uint i = 0; i < split_elements.size(); ++i) { + tmp_values.push_back(split_elements[i].str()); + } + if (tmp_values.size()==header.GetSize()) { + this->CallOnDataRow(header, tmp_values); tmp_values.clear(); - columns.clear(); - } else { - tmp_values.push_back(tline.str()); - const char* d=tmp_values.back().c_str(); - for (size_t i=before; i<columns.size(); ++i) { - columns[i]=StringRef(d+(columns[i].begin()-tline.begin()), - columns[i].size()); - } } } this->ConsumeLine(); @@ -362,11 +353,10 @@ void StarParser::ParseLastDataItemRow() { if (items_row_header_.GetCategory().size() > 0) { if (this->OnBeginLoop(items_row_header_)) { - this->OnDataRow(items_row_header_, items_row_columns_); + this->CallOnDataRow(items_row_header_, items_row_values_); this->OnEndLoop(); } items_row_values_.clear(); - items_row_columns_.clear(); items_row_header_.Clear(); } } @@ -385,8 +375,7 @@ void StarParser::ParseDataItemOrRow(StarDataItem& item) // row items_row_header_.Add(item.GetName()); items_row_values_.push_back(item.GetValue().str()); - items_row_columns_.push_back(StringRef(items_row_values_.back().data(), - items_row_values_.back().length()).trim()); + } else { this->OnDataItem(item); } @@ -579,4 +568,16 @@ void StarParser::Parse() } } +void StarParser::CallOnDataRow(const StarLoopDesc& header, + const std::vector<String>& columns) { + + std::vector<StringRef> string_refs; + string_refs.reserve(columns.size()); + for(uint i = 0; i < columns.size(); ++i) { + string_refs.push_back(StringRef(columns[i].data(), + columns[i].length()).trim()); + } + this->OnDataRow(header, string_refs); +} + }} diff --git a/modules/io/src/mol/star_parser.hh b/modules/io/src/mol/star_parser.hh index 53e170e72..139c39a84 100644 --- a/modules/io/src/mol/star_parser.hh +++ b/modules/io/src/mol/star_parser.hh @@ -73,6 +73,8 @@ public: category_=category.str(); } + + void Add(const StringRef& name) { index_map_.insert(std::make_pair(name.str(), index_map_.size())); @@ -262,6 +264,15 @@ private: has_current_line_=false; } + // the reason for having this function are problems when generating + // StringRefs that contain pointers to strings in a vector. + // When the vector grows, the memory of the strings might get + // reallocated and the StringRefs point into nothing. + // This function Prepares the stringrefs given a constant + // vector and calls OnDataRow to minimize the change in interface. + void CallOnDataRow(const StarLoopDesc& header, + const std::vector<String>& columns); + void ParseDataItemIdent(const StringRef ident, StringRef& cat, StringRef& name); void ParseGlobal(); @@ -278,7 +289,6 @@ private: bool items_as_row_; StarLoopDesc items_row_header_; bool file_open_; - std::vector<StringRef> items_row_columns_; std::vector<String> items_row_values_; }; -- GitLab