Browse Source

[2573] changed lexer's getTotalSourceSize/Position with accumulation.

now the size is not decreased when it's popped.  on second thought, I think
it makes more sense to eventually get more precise estimation of the
total progress.
JINMEI Tatuya 12 years ago
parent
commit
1b32af4e5c
3 changed files with 82 additions and 32 deletions
  1. 23 12
      src/lib/dns/master_lexer.cc
  2. 29 13
      src/lib/dns/master_lexer.h
  3. 30 7
      src/lib/dns/tests/master_lexer_unittest.cc

+ 23 - 12
src/lib/dns/master_lexer.cc

@@ -48,6 +48,7 @@ using namespace master_lexer_internal;
 
 struct MasterLexer::MasterLexerImpl {
     MasterLexerImpl() : source_(NULL), token_(MasterToken::NOT_STARTED),
+                        total_size_(0), popped_size_(0),
                         paren_count_(0), last_was_eol_(true),
                         has_previous_(false),
                         previous_paren_count_(0),
@@ -91,11 +92,28 @@ struct MasterLexer::MasterLexerImpl {
                 separators_.test(c & 0x7f));
     }
 
+    void setTotalSize() {
+        assert(source_ != NULL);
+        if (total_size_ != SOURCE_SIZE_UNKNOWN) {
+            const size_t current_size = source_->getSize();
+            if (current_size != SOURCE_SIZE_UNKNOWN) {
+                total_size_ += current_size;
+            } else {
+                total_size_ = SOURCE_SIZE_UNKNOWN;
+            }
+        }
+    }
+
     std::vector<InputSourcePtr> sources_;
     InputSource* source_;       // current source (NULL if sources_ is empty)
     MasterToken token_;         // currently recognized token (set by a state)
     std::vector<char> data_;    // placeholder for string data
 
+    // Keep track of the total size of all sources and characters that have
+    // been read from sources already popped.
+    size_t total_size_;         // accumulated size (# of chars) of sources
+    size_t popped_size_;        // total size of sources that have been popped
+
     // These are used in states, and defined here only as a placeholder.
     // The main lexer class does not need these members.
     size_t paren_count_;        // nest count of the parentheses
@@ -139,6 +157,7 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
     impl_->source_ = impl_->sources_.back().get();
     impl_->has_previous_ = false;
     impl_->last_was_eol_ = true;
+    impl_->setTotalSize();
     return (true);
 }
 
@@ -148,6 +167,7 @@ MasterLexer::pushSource(std::istream& input) {
     impl_->source_ = impl_->sources_.back().get();
     impl_->has_previous_ = false;
     impl_->last_was_eol_ = true;
+    impl_->setTotalSize();
 }
 
 void
@@ -156,6 +176,7 @@ MasterLexer::popSource() {
         isc_throw(InvalidOperation,
                   "MasterLexer::popSource on an empty source");
     }
+    impl_->popped_size_ += impl_->source_->getPosition();
     impl_->sources_.pop_back();
     impl_->source_ = impl_->sources_.empty() ? NULL :
         impl_->sources_.back().get();
@@ -185,22 +206,12 @@ MasterLexer::getSourceLine() const {
 
 size_t
 MasterLexer::getTotalSourceSize() const {
-    size_t total_size = 0;
-    BOOST_FOREACH(InputSourcePtr& src, impl_->sources_) {
-        // If the size of any pushed source is unknown, the total is also
-        // considered unknown.
-        if (src->getSize() == SOURCE_SIZE_UNKNOWN) {
-            return (SOURCE_SIZE_UNKNOWN);
-        }
-
-        total_size += src->getSize();
-    }
-    return (total_size);
+    return (impl_->total_size_);
 }
 
 size_t
 MasterLexer::getPosition() const {
-    size_t position = 0;
+    size_t position = impl_->popped_size_;
     BOOST_FOREACH(InputSourcePtr& src, impl_->sources_) {
         position += src->getPosition();
     }

+ 29 - 13
src/lib/dns/master_lexer.h

@@ -461,7 +461,7 @@ public:
     ///
     /// This method returns the sum of the size of sources that have been
     /// pushed to the lexer by the time of the call.  It would give the
-    /// caller of some hint about the amount of data the lexer is working on.
+    /// caller some hint about the amount of data the lexer is working on.
     ///
     /// The size of a normal file is equal to the file size at the time of
     /// the source is pushed.  The size of other type of input stream is
@@ -472,25 +472,42 @@ public:
     /// stream is unknown.  It happens, for example, if the standard input
     /// is associated with a pipe from the output of another process and it's
     /// specified as an input source.  If the size of some of the pushed
-    /// pushed source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
+    /// source is unknown, this method returns SOURCE_SIZE_UNKNOWN.
     ///
-    /// If there is no source pushed in the lexer, it returns 0.
+    /// The total size won't change when a source is popped.  So the return
+    /// values of this method will monotonically increase or
+    /// \c SOURCE_SIZE_UNKNOWN; once it returns \c SOURCE_SIZE_UNKNOWN,
+    /// any subsequent call will also result in that value, by the above
+    /// definition.
+    ///
+    /// Before pushing any source, it returns 0.
     ///
     /// \throw None
     size_t getTotalSourceSize() const;
 
-    /// \brief Return the position of lexer in the currently pushed sources.
+    /// \brief Return the position of lexer in the pushed sources so far.
     ///
     /// This method returns the position in terms of the number of recognized
-    /// characters from all sources.  Roughly speaking, the position in a
-    /// single source is the offset from the beginning of the file or stream
-    /// to the current "read cursor" of the lexer, and the return value of
-    /// this method is the sum of the position in all the pushed sources.
+    /// characters from all sources that have been pushed by the time of the
+    /// call.  Conceptually, the position in a single source is the offset
+    /// from the beginning of the file or stream to the current "read cursor"
+    /// of the lexer.  The return value of this method is the sum of the
+    /// positions in all the pushed sources.  If any of the sources has
+    /// already been popped, the position of the source at the time of the
+    /// pop operation will be used for the calculation.
     ///
     /// If the lexer reaches the end for each of all the pushed sources,
     /// the return value should be equal to that of \c getTotalSourceSize().
+    /// It's generally expected that a source is popped when the lexer
+    /// reaches the end of the source.  So, when the application of this
+    /// class parses all contents of all sources, possibly with multiple
+    /// pushes and pops, the return value of this method and
+    /// \c getTotalSourceSize() should be identical (unless the latter
+    /// returns SOURCE_SIZE_UNKNOWN).  But this is not necessarily
+    /// guaranteed as the application can pop a source in the middle of
+    /// parsing it.
     ///
-    /// If there is no source pushed in the lexer, it returns 0.
+    /// Before pushing any source, it returns 0.
     ///
     /// The return values of this method and \c getTotalSourceSize() would
     /// give the caller an idea of the progress of the lexer at the time of
@@ -499,11 +516,10 @@ public:
     /// this way may not make much sense; it can only give an informational
     /// hint of the progress.
     ///
-    /// Note also that if a source is popped, this method will normally return
-    /// a smaller number by definition (and so will \c getTotalSourceSize()).
-    /// Likewise, the conceptual "read cursor" would move backward after a
+    /// Note that the conceptual "read cursor" would move backward after a
     /// call to \c ungetToken(), in which case this method will return a
-    /// smaller value, too.
+    /// smaller value.  That is, unlike \c getTotalSourceSize(), return
+    /// values of this method may not always monotonically increase.
     ///
     /// \throw None
     size_t getPosition() const;

+ 30 - 7
src/lib/dns/tests/master_lexer_unittest.cc

@@ -52,7 +52,6 @@ void
 checkEmptySource(const MasterLexer& lexer) {
     EXPECT_TRUE(lexer.getSourceName().empty());
     EXPECT_EQ(0, lexer.getSourceLine());
-    EXPECT_EQ(0, lexer.getTotalSourceSize());
     EXPECT_EQ(0, lexer.getPosition());
 }
 
@@ -78,6 +77,7 @@ TEST_F(MasterLexerTest, pushStream) {
     lexer.popSource();
     EXPECT_EQ(0, lexer.getSourceCount());
     checkEmptySource(lexer);
+    EXPECT_EQ(4, lexer.getTotalSourceSize()); // this shouldn't change
 }
 
 TEST_F(MasterLexerTest, pushFile) {
@@ -96,6 +96,7 @@ TEST_F(MasterLexerTest, pushFile) {
     lexer.popSource();
     checkEmptySource(lexer);
     EXPECT_EQ(0, lexer.getSourceCount());
+    EXPECT_EQ(143, lexer.getTotalSourceSize()); // this shouldn't change
 
     // If we give a non NULL string pointer, its content will be intact
     // if pushSource succeeds.
@@ -124,22 +125,44 @@ TEST_F(MasterLexerTest, pushFileFail) {
 }
 
 TEST_F(MasterLexerTest, nestedPush) {
-    ss << "test";
+    const string test_txt = "test";
+    ss << test_txt;
     lexer.pushSource(ss);
+
+    EXPECT_EQ(test_txt.size(), lexer.getTotalSourceSize());
+    EXPECT_EQ(0, lexer.getPosition());
+
     EXPECT_EQ(expected_stream_name, lexer.getSourceName());
 
+    // Read the string; getPosition() should reflect that.
+    EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
+    EXPECT_EQ(test_txt.size(), lexer.getPosition());
+
     // We can push another source without popping the previous one.
     lexer.pushSource(TEST_DATA_SRCDIR "/masterload.txt");
     EXPECT_EQ(TEST_DATA_SRCDIR "/masterload.txt", lexer.getSourceName());
-    EXPECT_EQ(143 + 4, lexer.getTotalSourceSize()); // see above for magic nums
+    EXPECT_EQ(143 + test_txt.size(),
+              lexer.getTotalSourceSize()); // see above for magic nums
+
+    // the next token should be the EOL (skipping a comment line), its
+    // position in the file is 35 (hardcoded).
+    EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
+    EXPECT_EQ(test_txt.size() + 35, lexer.getPosition());
 
     // popSource() works on the "topmost" (last-pushed) source
     lexer.popSource();
     EXPECT_EQ(expected_stream_name, lexer.getSourceName());
-    EXPECT_EQ(4, lexer.getTotalSourceSize());
+
+    // pop shouldn't change the total size and the current position
+    EXPECT_EQ(143 + test_txt.size(), lexer.getTotalSourceSize());
+    EXPECT_EQ(test_txt.size() + 35, lexer.getPosition());
 
     lexer.popSource();
     EXPECT_TRUE(lexer.getSourceName().empty());
+
+    // size and position still shouldn't change
+    EXPECT_EQ(143 + test_txt.size(), lexer.getTotalSourceSize());
+    EXPECT_EQ(test_txt.size() + 35, lexer.getPosition());
 }
 
 TEST_F(MasterLexerTest, unknownSourceSize) {
@@ -155,9 +178,9 @@ TEST_F(MasterLexerTest, unknownSourceSize) {
     // Then the total size is also unknown.
     EXPECT_EQ(MasterLexer::SOURCE_SIZE_UNKNOWN, lexer.getTotalSourceSize());
 
-    // If we pop that source, the size becomes known again.
+    // Even if we pop that source, the size is still unknown.
     lexer.popSource();
-    EXPECT_EQ(4, lexer.getTotalSourceSize());
+    EXPECT_EQ(MasterLexer::SOURCE_SIZE_UNKNOWN, lexer.getTotalSourceSize());
 }
 
 TEST_F(MasterLexerTest, invalidPop) {
@@ -308,7 +331,7 @@ TEST_F(MasterLexerTest, includeAndInitialWS) {
     lexer.pushSource(ss2);
     EXPECT_EQ(MasterToken::INITIAL_WS,
               lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
-    EXPECT_EQ(2, lexer.getPosition()); // should be sum of position positions.
+    EXPECT_EQ(2, lexer.getPosition()); // should be sum of pushed positions.
 }
 
 // Test only one token can be ungotten