Browse Source

[3360] Added CSV file validation.

Marcin Siodelski 11 years ago
parent
commit
6199f3a5ea
3 changed files with 334 additions and 34 deletions
  1. 124 20
      src/lib/util/csv_file.cc
  2. 75 2
      src/lib/util/csv_file.h
  3. 135 12
      src/lib/util/tests/csv_file_unittest.cc

+ 124 - 20
src/lib/util/csv_file.cc

@@ -124,7 +124,8 @@ CSVRow::checkIndex(const size_t at) const {
 }
 
 CSVFile::CSVFile(const std::string& filename)
-    : primary_separator_(','), filename_(filename), fs_() {
+    : primary_separator_(','), filename_(filename), fs_(), cols_(0),
+      read_msg_() {
 }
 
 CSVFile::~CSVFile() {
@@ -142,6 +143,12 @@ CSVFile::close() {
 }
 
 void
+CSVFile::flush() const {
+    checkStreamStatus("flush");
+    fs_->flush();
+}
+
+void
 CSVFile::addColumn(const std::string& col_name) {
     if (getColumnIndex(col_name) >= 0) {
         isc_throw(CSVFileError, "attempt to add duplicate column '"
@@ -152,10 +159,10 @@ CSVFile::addColumn(const std::string& col_name) {
 
 void
 CSVFile::append(const CSVRow& row) const {
-    if (!fs_) {
-        isc_throw(CSVFileError, "unable to write a row to the CSV file '"
-                  << filename_ << "', which is closed");
-    }
+    checkStreamStatus("append");
+
+    // If a stream is in invalid state, reset the state.
+    fs_->clear();
 
     if (row.getValuesCount() != getColumnCount()) {
         isc_throw(CSVFileError, "number of values in the CSV row '"
@@ -176,6 +183,19 @@ CSVFile::append(const CSVRow& row) const {
     }
 }
 
+void
+CSVFile::checkStreamStatus(const std::string& operation) const {
+    if (!fs_) {
+        isc_throw(CSVFileError, "NULL stream pointer when performing '"
+                  << operation << "' on file '" << filename_ << "'");
+
+    } else if (!fs_->is_open()) {
+        isc_throw(CSVFileError, "closed stream when performing '"
+                  << operation << "' on file '" << filename_ << "'");
+
+    }
+}
+
 std::ifstream::pos_type
 CSVFile::size() const {
     std::ifstream fs(filename_.c_str());
@@ -186,11 +206,16 @@ CSVFile::size() const {
         fs.close();
         return (0);
     }
-    // Seek to the end of file and see where we are. This is a size of
-    // the file.
-    fs.seekg(0, std::ifstream::end);
-    std::ifstream::pos_type pos = fs.tellg();
-    fs.close();
+    std::ifstream::pos_type pos;
+    try {
+        // Seek to the end of file and see where we are. This is a size of
+        // the file.
+        fs.seekg(0, std::ifstream::end);
+        pos = fs.tellg();
+        fs.close();
+    } catch (const std::exception& ex) {
+        return (0);
+    }
     return (pos);
 }
 
@@ -214,8 +239,25 @@ CSVFile::getColumnName(const size_t col_index) const {
     return (cols_[col_index]);
 }
 
-void
-CSVFile::next(CSVRow& row) {
+bool
+CSVFile::next(CSVRow& row, const bool skip_validation) {
+    // Set somethings as row validation error. Although, we haven't started
+    // actual row validation we should get rid of any previously recorded
+    // errors so as the caller doesn't interpret them as the current one.
+    setReadMsg("validation not started");
+
+    try {
+        // Check that stream is "ready" for any IO operations.
+        checkStreamStatus("get next row");
+
+    } catch (isc::Exception& ex) {
+        setReadMsg(ex.what());
+        return (false);
+    }
+
+    // If a stream is in invalid state, reset the state.
+    fs_->clear();
+
     // Get exactly one line of the file.
     std::string line;
     std::getline(*fs_, line);
@@ -223,10 +265,20 @@ CSVFile::next(CSVRow& row) {
     // return an empty row.
     if (line.empty() && fs_->eof()) {
         row = EMPTY_ROW();
-        return;
+        return (true);
+
+    } else if (!fs_->good()) {
+        // If we hit an IO error, communicate it to the caller but do NOT close
+        // the stream. Caller may try again.
+        setReadMsg("error reading a row from CSV file '"
+                   + std::string(filename_) + "'");
+        return (false);
     }
     // If we read anything, parse it.
     row.parse(line.c_str());
+
+    // And check if it is correct.
+    return (skip_validation ? true : validate(row));
 }
 
 void
@@ -248,14 +300,34 @@ CSVFile::open() {
         // Make sure we are on the beginning of the file, so as we can parse
         // the header.
         fs_->seekg(0);
+        if (!fs_->good()) {
+            close();
+            isc_throw(CSVFileError, "unable to set read pointer in the file '"
+                      << filename_ << "'");
+        }
 
-        // Get the header.
-        std::string line;
-        std::getline(*fs_, line);
-        CSVRow header(line.c_str(), primary_separator_);
-        // Initialize columns.
-        for (size_t i = 0; i < header.getValuesCount(); ++i) {
-            addColumn(header.readAt(i));
+        // Read the header.
+        CSVRow header;
+        if (!next(header, true)) {
+            close();
+            isc_throw(CSVFileError, "failed to read and parse header of the"
+                      " CSV file '" << filename_ << "': "
+                      << getReadMsg());
+        }
+
+        // Check the header against the columns specified for the CSV file.
+        if (!validateHeader(header)) {
+            close();
+            isc_throw(CSVFileError, "invalid header '" << header
+                      << "' in CSV file '" << filename_ << "'");
+        }
+
+        // Everything is good, so if we haven't added any columns yet,
+        // add them.
+        if (getColumnCount() == 0) {
+            for (size_t i = 0; i < header.getValuesCount(); ++i) {
+                addColumn(header.readAt(i));
+            }
         }
     }
 }
@@ -291,5 +363,37 @@ CSVFile::recreate() {
 
 }
 
+bool
+CSVFile::validate(const CSVRow& row) {
+    setReadMsg("success");
+    bool ok = (row.getValuesCount() == getColumnCount());
+    if (!ok) {
+        std::ostringstream s;
+        s << "the size of the row '" << row << "' doesn't match the number of"
+            " columns '" << getColumnCount() << "' of the CSV file '"
+          << filename_ << "'";
+        setReadMsg(s.str());
+    }
+    return (ok);
 }
+
+bool
+CSVFile::validateHeader(const CSVRow& header) {
+    if (getColumnCount() == 0) {
+        return (true);
+    }
+
+    if (getColumnCount() != header.getValuesCount()) {
+        return (false);
+    }
+
+    for (int i = 0; i < getColumnCount(); ++i) {
+        if (getColumnName(i) != header.readAt(i)) {
+            return (false);
+        }
+    }
+    return (true);
 }
+
+} // end of isc::util namespace
+} // end of isc namespace

+ 75 - 2
src/lib/util/csv_file.h

@@ -73,7 +73,7 @@ public:
     /// @param cols Number of values in the row.
     /// @param separator Character being a separator between values in the
     /// text representation of the row.
-    CSVRow(const size_t cols, const char separator = ',');
+    CSVRow(const size_t cols = 0, const char separator = ',');
 
     /// @brief Constructor, parses a single row of the CSV file.
     ///
@@ -270,11 +270,22 @@ public:
     /// @brief Closes the CSV file.
     void close();
 
+    /// @brief Flushes a file.
+    void flush() const;
+
     /// @brief Returns the number of columns in the file.
     size_t getColumnCount() const {
         return (cols_.size());
     }
 
+    /// @brief Returns the description of the last error returned by the
+    /// @c CSVFile::next function.
+    ///
+    /// @return Description of the last error during row validation.
+    std::string getReadMsg() const {
+        return (read_msg_);
+    }
+
     /// @brief Returns the index of the column having specified name.
     ///
     /// This function is exception safe.
@@ -299,7 +310,11 @@ public:
     /// reached, the empty row is returned (a row containing no values).
     ///
     /// @param [out] row Object receiving the parsed CSV file.
-    void next(CSVRow& row);
+    /// @param skip_validation Do not perform validation.
+    ///
+    /// @return true if row has been read and validated; false if validation
+    /// failed.
+    bool next(CSVRow& row, const bool skip_validation = false);
 
     /// @brief Opens existing file or creates a new one.
     ///
@@ -322,14 +337,69 @@ public:
     /// should be called.
     void recreate();
 
+    /// @brief Sets error message after row validation.
+    ///
+    /// The @c CSVFile::validate function is responsible for setting the
+    /// error message after validation of the row read from the CSV file.
+    /// It will use this function to set this message. Note, that the
+    /// @c validate function can set a message after successful validation
+    /// too. Such message could say "success", or something similar.
+    ///
+    /// @param val_msg Error message to be set.
+    void setReadMsg(const std::string& read_msg) {
+        read_msg_ = read_msg;
+    }
+
     /// @brief Represents empty row.
     static CSVRow EMPTY_ROW() {
         static CSVRow row(0);
         return (row);
     }
 
+protected:
+
+    /// @brief Validate the row read from a file.
+    ///
+    /// This function implements a basic validation for the row read from the
+    /// CSV file. It is virtual so as it may be customized in derived classes.
+    ///
+    /// This default implementation checks that the number of values in the
+    /// row corresponds to the number of columns specified for this file.
+    ///
+    /// If row validation fails, the error message is noted and can be retrieved
+    /// using @c CSVFile::getReadMsg. The function which overrides this
+    /// base implementation is responsible for setting the error message using
+    /// @c CSVFile::setReadMsg.
+    ///
+    /// @param row A row to be validated.
+    ///
+    /// @return true if the column is valid; false otherwise.
+    virtual bool validate(const CSVRow& row);
+
 private:
 
+    /// @brief This function validates the header of the CSV file.
+    ///
+    /// If there are any columns added to the @c CSVFile object, it will
+    /// compare that they exactly match (including order) the header read
+    /// from the file.
+    ///
+    /// This function is called internally by @CSVFile::open.
+    ///
+    /// @param header A row holding a header.
+    /// @return true if header matches the columns; false otherwise.
+    bool validateHeader(const CSVRow& header);
+
+    /// @brief Sanity check if stream is open.
+    ///
+    /// Checks if the file stream is open so as IO operations can be performed
+    /// on it. This is internally called by the public class members to prevent
+    /// them from performing IO operations on invalid stream and using NULL
+    /// pointer to a stream.
+    ///
+    /// @throw CSVFileError if stream is closed or pointer to it is NULL.
+    void checkStreamStatus(const std::string& operation) const;
+
     /// @brief Returns size of the CSV file.
     std::ifstream::pos_type size() const;
 
@@ -344,6 +414,9 @@ private:
 
     /// @brief Holds CSV file columns.
     std::vector<std::string> cols_;
+
+    /// @brief Holds last error during row reading or validation.
+    std::string read_msg_;
 };
 
 } // namespace isc::util

+ 135 - 12
src/lib/util/tests/csv_file_unittest.cc

@@ -136,6 +136,7 @@ public:
     /// @param contents Contents of the file.
     void writeFile(const std::string& contents) const;
 
+    /// @brief Absolute path to the file used in the tests.
     std::string testfile_;
 
 };
@@ -190,13 +191,16 @@ CSVFileTest::writeFile(const std::string& contents) const {
     }
 }
 
-// This test checks that the file can be opened and its content
-// parsed correctly. It also checks that empty row is returned
-// when EOF is reached.
-TEST_F(CSVFileTest, open) {
+// This test checks that the file can be opened,  its whole content is
+// parsed correctly and data may be appended. It also checks that empty
+// row is returned when EOF is reached.
+TEST_F(CSVFileTest, openReadAllWrite) {
+    // Create a new CSV file that contains a header and two data rows.
     writeFile("animal,age,color\n"
               "cat,10,white\n"
               "lion,15,yellow\n");
+
+    // Open this file and check that the header is parsed.
     boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
     ASSERT_NO_THROW(csv->open());
     ASSERT_EQ(3, csv->getColumnCount());
@@ -204,33 +208,65 @@ TEST_F(CSVFileTest, open) {
     EXPECT_EQ("age", csv->getColumnName(1));
     EXPECT_EQ("color", csv->getColumnName(2));
 
-    CSVRow row(0);
-    ASSERT_NO_THROW(csv->next(row));
+    // Read first row.
+    CSVRow row;
+    ASSERT_TRUE(csv->next(row));
     ASSERT_EQ(3, row.getValuesCount());
     EXPECT_EQ("cat", row.readAt(0));
     EXPECT_EQ("10", row.readAt(1));
     EXPECT_EQ("white", row.readAt(2));
 
-    ASSERT_NO_THROW(csv->next(row));
+    // Read second row.
+    ASSERT_TRUE(csv->next(row));
     ASSERT_EQ(3, row.getValuesCount());
     EXPECT_EQ("lion", row.readAt(0));
     EXPECT_EQ("15", row.readAt(1));
     EXPECT_EQ("yellow", row.readAt(2));
 
-    ASSERT_NO_THROW(csv->next(row));
+    // There is no 3rd row, so the empty one should be returned.
+    ASSERT_TRUE(csv->next(row));
+    EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+    // It should be fine to read again, but again empty row should be returned.
+    ASSERT_TRUE(csv->next(row));
     EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+    // Now, let's try to append something to this file.
+    CSVRow row_write(3);
+    row_write.writeAt(0, "dog");
+    row_write.writeAt(1, 2);
+    row_write.writeAt(2, "blue");
+    ASSERT_NO_THROW(csv->append(row_write));
+
+    // Close the file.
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
+    // Check the the file contents are correct.
+    EXPECT_EQ("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow\n"
+              "dog,2,blue\n",
+              readFile());
+
+    // Any attempt to read from the file or write to it should now fail.
+    EXPECT_FALSE(csv->next(row));
+    EXPECT_THROW(csv->append(row_write), CSVFileError);
 }
 
-// This test checks that a file can be used both for reading
-// and writing. When content is appended to the end of file,
-// an attempt to read results in empty row returned.
-TEST_F(CSVFileTest, openReadWrite) {
+// This test checks that contents may be appended to a file which hasn't
+// been fully parsed/read.
+TEST_F(CSVFileTest, openReadPartialWrite) {
+    // Create a CSV file with two rows in it.
     writeFile("animal,age,color\n"
               "cat,10,white\n"
               "lion,15,yellow\n");
+
+    // Open this file.
     boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
     ASSERT_NO_THROW(csv->open());
 
+    // Read the first row.
     CSVRow row0(0);
     ASSERT_NO_THROW(csv->next(row0));
     ASSERT_EQ(3, row0.getValuesCount());
@@ -238,15 +274,31 @@ TEST_F(CSVFileTest, openReadWrite) {
     EXPECT_EQ("10", row0.readAt(1));
     EXPECT_EQ("white", row0.readAt(2));
 
+    // There is still second row to be read. But, it should be possible to
+    // skip reading it and append new row to the end of file.
     CSVRow row_write(3);
     row_write.writeAt(0, "dog");
     row_write.writeAt(1, 2);
     row_write.writeAt(2, "blue");
     ASSERT_NO_THROW(csv->append(row_write));
 
+    // At this point, the file pointer is at the end of file, so reading
+    // should return empty row.
     CSVRow row1(0);
     ASSERT_NO_THROW(csv->next(row1));
     EXPECT_EQ(CSVFile::EMPTY_ROW(), row1);
+
+    // Close the file.
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
+    // Check that there are two initial lines and one new there.
+    EXPECT_EQ("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow\n"
+              "dog,2,blue\n",
+              readFile());
+
 }
 
 // This test checks that the new CSV file is created and header
@@ -274,11 +326,82 @@ TEST_F(CSVFileTest, recreate) {
     row1.writeAt(2, 2);
     ASSERT_NO_THROW(csv->append(row1));
 
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
     EXPECT_EQ("animal,color,age,comments\n"
               "dog,grey,3,nice one\n"
               "cat,black,2,\n",
               readFile());
 }
 
+// This test checks that the error is reported when the size of the row being
+// read doesn't match the number of columns of the CSV file.
+TEST_F(CSVFileTest, validate) {
+    // Create CSV file with 2 invalid rows in it: one too long, one too short.
+    // Apart from that, there are two valid columns that should be read
+    // successfuly.
+    writeFile("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow,black\n"
+              "dog,3,green\n"
+              "elephant,11\n");
+
+    boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+    ASSERT_NO_THROW(csv->open());
+    // First row is correct.
+    CSVRow row0;
+    ASSERT_TRUE(csv->next(row0));
+    EXPECT_EQ("cat", row0.readAt(0));
+    EXPECT_EQ("10", row0.readAt(1));
+    EXPECT_EQ("white", row0.readAt(2));
+    EXPECT_EQ("success", csv->getReadMsg());
+    // This row is too long.
+    CSVRow row1;
+    EXPECT_FALSE(csv->next(row1));
+    EXPECT_NE("success", csv->getReadMsg());
+    // This row is correct.
+    CSVRow row2;
+    ASSERT_TRUE(csv->next(row2));
+    EXPECT_EQ("dog", row2.readAt(0));
+    EXPECT_EQ("3", row2.readAt(1));
+    EXPECT_EQ("green", row2.readAt(2));
+    EXPECT_EQ("success", csv->getReadMsg());
+    // This row is too short.
+    CSVRow row3;
+    EXPECT_FALSE(csv->next(row3));
+    EXPECT_NE("success", csv->getReadMsg());
+}
+
+// Test test checks that exception is thrown when the header of the CSV file
+// parsed, doesn't match the columns specified.
+TEST_F(CSVFileTest, validateHeader) {
+    // Create CSV file with 3 columns.
+    writeFile("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow,black\n");
+
+    // Invalid order of columns.
+    boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+    csv->addColumn("color");
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    EXPECT_THROW(csv->open(), CSVFileError);
+
+    // Too many columns.
+    csv.reset(new CSVFile(testfile_));
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    csv->addColumn("color");
+    csv->addColumn("notes");
+    EXPECT_THROW(csv->open(), CSVFileError);
+
+    // Too few columns.
+    csv.reset(new CSVFile(testfile_));
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    EXPECT_THROW(csv->open(), CSVFileError);
+}
+
 
 } // end of anonymous namespace