Michal 'vorner' Vaner 12 years ago
parent
commit
dfb173cea5

+ 64 - 12
src/lib/dns/master_lexer.cc

@@ -33,9 +33,13 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
 }
 using namespace master_lexer_internal;
 
+
 struct MasterLexer::MasterLexerImpl {
     MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
-                        paren_count_(0), last_was_eol_(false)
+                        paren_count_(0), last_was_eol_(false),
+                        has_previous_(false),
+                        previous_paren_count_(0),
+                        previous_was_eol_(false)
     {
         separators_.set('\r');
         separators_.set('\n');
@@ -91,6 +95,11 @@ struct MasterLexer::MasterLexerImpl {
     // if escaped by a backslash.  See isTokenEnd() for the bitmap size.
     std::bitset<128> separators_;
     std::bitset<128> esc_separators_;
+
+    // These are to allow restoring state before previous token.
+    bool has_previous_;
+    size_t previous_paren_count_;
+    bool previous_was_eol_;
 };
 
 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
@@ -116,6 +125,7 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
     }
 
     impl_->source_ = impl_->sources_.back().get();
+    impl_->has_previous_ = false;
     return (true);
 }
 
@@ -123,6 +133,7 @@ void
 MasterLexer::pushSource(std::istream& input) {
     impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
     impl_->source_ = impl_->sources_.back().get();
+    impl_->has_previous_ = false;
 }
 
 void
@@ -134,6 +145,7 @@ MasterLexer::popSource() {
     impl_->sources_.pop_back();
     impl_->source_ = impl_->sources_.empty() ? NULL :
         impl_->sources_.back().get();
+    impl_->has_previous_ = false;
 }
 
 std::string
@@ -152,12 +164,53 @@ MasterLexer::getSourceLine() const {
     return (impl_->sources_.back()->getCurrentLine());
 }
 
+const MasterLexer::Token&
+MasterLexer::getNextToken(Options options) {
+    // If the source is not available
+    if (impl_->source_ == NULL) {
+        isc_throw(isc::InvalidOperation, "No source to read tokens from");
+    }
+    // Store the current state so we can restore it in ungetToken
+    impl_->previous_paren_count_ = impl_->paren_count_;
+    impl_->previous_was_eol_ = impl_->last_was_eol_;
+    impl_->source_->mark();
+    impl_->has_previous_ = true;
+    // Reset the token now. This is to check a token was actually produced.
+    // This is debugging aid.
+    impl_->token_ = Token(Token::NO_TOKEN_PRODUCED);
+    // And get the token
+
+    // This actually handles EOF internally too.
+    const State* state = State::start(*this, options);
+    if (state != NULL) {
+        state->handle(*this);
+    }
+    // Make sure a token was produced. Since this Can Not Happen, we assert
+    // here instead of throwing.
+    assert(impl_->token_.getType() != Token::ERROR ||
+           impl_->token_.getErrorCode() != Token::NO_TOKEN_PRODUCED);
+    return (impl_->token_);
+}
+
+void
+MasterLexer::ungetToken() {
+    if (impl_->has_previous_) {
+        impl_->has_previous_ = false;
+        impl_->source_->ungetAll();
+        impl_->last_was_eol_ = impl_->previous_was_eol_;
+        impl_->paren_count_ = impl_->previous_paren_count_;
+    } else {
+        isc_throw(isc::InvalidOperation, "No token to unget ready");
+    }
+}
+
 namespace {
 const char* const error_text[] = {
     "lexer not started",        // NOT_STARTED
     "unbalanced parentheses",   // UNBALANCED_PAREN
     "unexpected end of input",  // UNEXPECTED_END
-    "unbalanced quotes"         // UNBALANCED_QUOTES
+    "unbalanced quotes",        // UNBALANCED_QUOTES
+    "no token produced"         // NO_TOKEN_PRODUCED
 };
 const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
 }
@@ -201,7 +254,7 @@ class CRLF : public State {
 public:
     CRLF() {}
     virtual ~CRLF() {}          // see the base class for the destructor
-    virtual const State* handle(MasterLexer& lexer) const {
+    virtual void handle(MasterLexer& lexer) const {
         // We've just seen '\r'.  If this is part of a sequence of '\r\n',
         // we combine them as a single END-OF-LINE.  Otherwise we treat the
         // single '\r' as an EOL and continue tokeniziation from the character
@@ -218,7 +271,6 @@ public:
         }
         getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
         getLexerImpl(lexer)->last_was_eol_ = true;
-        return (NULL);
     }
 };
 
@@ -226,14 +278,14 @@ class String : public State {
 public:
     String() {}
     virtual ~String() {}      // see the base class for the destructor
-    virtual const State* handle(MasterLexer& lexer) const;
+    virtual void handle(MasterLexer& lexer) const;
 };
 
 class QString : public State {
 public:
     QString() {}
     virtual ~QString() {}      // see the base class for the destructor
-    virtual const State* handle(MasterLexer& lexer) const;
+    virtual void handle(MasterLexer& lexer) const;
 };
 
 // We use a common instance of a each state in a singleton-like way to save
@@ -325,7 +377,7 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
     }
 }
 
-const State*
+void
 String::handle(MasterLexer& lexer) const {
     std::vector<char>& data = getLexerImpl(lexer)->data_;
     data.clear();
@@ -339,14 +391,14 @@ String::handle(MasterLexer& lexer) const {
             getLexerImpl(lexer)->source_->ungetChar();
             getLexerImpl(lexer)->token_ =
                 MasterLexer::Token(&data.at(0), data.size());
-            return (NULL);
+            return;
         }
         escaped = (c == '\\' && !escaped);
         data.push_back(c);
     }
 }
 
-const State*
+void
 QString::handle(MasterLexer& lexer) const {
     MasterLexer::Token& token = getLexerImpl(lexer)->token_;
     std::vector<char>& data = getLexerImpl(lexer)->data_;
@@ -357,7 +409,7 @@ QString::handle(MasterLexer& lexer) const {
         const int c = getLexerImpl(lexer)->source_->getChar();
         if (c == InputSource::END_OF_STREAM) {
             token = Token(Token::UNEXPECTED_END);
-            return (NULL);
+            return;
         } else if (c == '"') {
             if (escaped) {
                 // found escaped '"'. overwrite the preceding backslash.
@@ -366,12 +418,12 @@ QString::handle(MasterLexer& lexer) const {
                 data.back() = '"';
             } else {
                 token = MasterLexer::Token(&data.at(0), data.size(), true);
-                return (NULL);
+                return;
             }
         } else if (c == '\n' && !escaped) {
             getLexerImpl(lexer)->source_->ungetChar();
             token = Token(Token::UNBALANCED_QUOTES);
-            return (NULL);
+            return;
         } else {
             escaped = (c == '\\' && !escaped);
             data.push_back(c);

+ 57 - 1
src/lib/dns/master_lexer.h

@@ -69,6 +69,14 @@ class State;
 class MasterLexer {
     friend class master_lexer_internal::State;
 public:
+    /// \brief Exception thrown when we fail to read from the input
+    /// stream or file.
+    struct ReadError : public Unexpected {
+        ReadError(const char* file, size_t line, const char* what) :
+            Unexpected(file, line, what)
+        {}
+    };
+
     class Token;       // we define it separately for better readability
 
     /// \brief Options for getNextToken.
@@ -178,6 +186,52 @@ public:
     /// \return The current line number of the source (see the description)
     size_t getSourceLine() const;
 
+    /// \brief Parse and return another token from the input.
+    ///
+    /// It reads a bit of the last opened source and produces another token
+    /// found in it.
+    ///
+    /// This method does not provide the strong exception guarantee. Generally,
+    /// if it throws, the object should not be used any more and should be
+    /// discarded. It was decided all the exceptions thrown from here are
+    /// serious enough that aborting the loading process is the only reasonable
+    /// recovery anyway, so the strong exception guarantee is not needed.
+    ///
+    /// \param options The options can be used to modify the tokenization.
+    ///     The method can be made reporting things which are usually ignored
+    ///     by this parameter. Multiple options can be passed at once by
+    ///     bitwise or (eg. option1 | option 2). See description of available
+    ///     options.
+    /// \return Next token found in the input. Note that the token refers to
+    ///     some internal data in the lexer. It is valid only until
+    ///     getNextToken or ungetToken is called. Also, the token becomes
+    ///     invalid when the lexer is destroyed.
+    /// \throw isc::InvalidOperation in case the source is not available. This
+    ///     may mean the pushSource() has not been called yet, or that the
+    ///     current source has been read past the end.
+    /// \throw ReadError in case there's problem reading from the underlying
+    ///     source (eg. I/O error in the file on the disk).
+    /// \throw std::bad_alloc in case allocation of some internal resources
+    ///     or the token fail.
+    const Token& getNextToken(Options options = NONE);
+
+    /// \brief Return the last token back to the lexer.
+    ///
+    /// The method undoes the lasts call to getNextToken(). If you call the
+    /// getNextToken() again with the same options, it'll return the same
+    /// token. If the options are different, it may return a different token,
+    /// but it acts as if the previous getNextToken() was never called.
+    ///
+    /// It is possible to return only one token back in time (you can't call
+    /// ungetToken() twice in a row without calling getNextToken() in between
+    /// successfully).
+    ///
+    /// It does not work after change of source (by pushSource or popSource).
+    ///
+    /// \throw isc::InvalidOperation If called second time in a row or if
+    ///     getNextToken() was not called since the last change of the source.
+    void ungetToken();
+
 private:
     struct MasterLexerImpl;
     MasterLexerImpl* impl_;
@@ -234,8 +288,10 @@ public:
         NOT_STARTED, ///< The lexer is just initialized and has no token
         UNBALANCED_PAREN,       ///< Unbalanced parentheses detected
         UNEXPECTED_END, ///< The lexer reaches the end of line or file
-                       /// unexpectedly
+                        /// unexpectedly
         UNBALANCED_QUOTES,      ///< Unbalanced quotations detected
+        NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
+                           /// error and should never get out of the lexer.
         MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
                        /// (excluding this one). Mainly for internal use.
     };

+ 2 - 1
src/lib/dns/master_lexer_inputsource.cc

@@ -13,6 +13,7 @@
 // PERFORMANCE OF THIS SOFTWARE.
 
 #include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer.h>
 
 #include <cerrno>
 #include <cstring>
@@ -94,7 +95,7 @@ InputSource::getChar() {
         // This has to come after the .eof() check as some
         // implementations seem to check the eofbit also in .fail().
         if (input_.fail()) {
-            isc_throw(ReadError,
+            isc_throw(MasterLexer::ReadError,
                       "Error reading from the input stream: " << getName());
         }
         buffer_.push_back(c);

+ 2 - 10
src/lib/dns/master_lexer_inputsource.h

@@ -56,14 +56,6 @@ public:
         {}
     };
 
-    /// \brief Exception thrown when we fail to read from the input
-    /// stream or file.
-    struct ReadError : public Unexpected {
-        ReadError(const char* file, size_t line, const char* what) :
-            Unexpected(file, line, what)
-        {}
-    };
-
     /// \brief Exception thrown when we fail to open the input file.
     struct OpenError : public Unexpected {
         OpenError(const char* file, size_t line, const char* what) :
@@ -124,8 +116,8 @@ public:
     /// \brief Returns a single character from the input source. If end
     /// of file is reached, \c END_OF_STREAM is returned.
     ///
-    /// \throws ReadError when reading from the input stream or file
-    /// fails.
+    /// \throws MasterLexer::ReadError when reading from the input stream or
+    /// file fails.
     int getChar();
 
     /// \brief Skips backward a single character in the input

+ 9 - 7
src/lib/dns/master_lexer_state.h

@@ -17,6 +17,8 @@
 
 #include <dns/master_lexer.h>
 
+#include <boost/function.hpp>
+
 namespace isc {
 namespace dns {
 
@@ -67,7 +69,7 @@ public:
     /// tokenization session.  The lexer passes a reference to itself
     /// and options given in \c getNextToken().
     ///
-    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw MasterLexer::ReadError Unexpected I/O error
     /// \throw std::bad_alloc Internal resource allocation failure
     ///
     /// \param lexer The lexer object that holds the main context.
@@ -80,16 +82,16 @@ public:
     /// \brief Handle the process of one specific state.
     ///
     /// This method is expected to be called on the object returned by
-    /// start(), and keep called on the returned object until NULL is
-    /// returned.  The call chain will form the complete state transition.
+    /// start(). In the usual state transition design pattern, it would
+    /// return the next state. But as we noticed, we never have another
+    /// state, so we simplify it by not returning anything instead of
+    /// returning NULL every time.
     ///
-    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw MasterLexer::ReadError Unexpected I/O error
     /// \throw std::bad_alloc Internal resource allocation failure
     ///
     /// \param lexer The lexer object that holds the main context.
-    /// \return A pointer to the next state object or NULL if the transition
-    /// is completed.
-    virtual const State* handle(MasterLexer& lexer) const = 0;
+    virtual void handle(MasterLexer& lexer) const = 0;
 
     /// \brief Types of states.
     ///

+ 31 - 31
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -227,7 +227,7 @@ TEST_F(MasterLexerStateTest, crlf) {
 
     // 1. A sequence of \r, \n is recognized as a single 'end-of-line'
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
-    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
+    s_crlf.handle(lexer);   // recognize '\n'
     EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
     EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
 
@@ -235,22 +235,22 @@ TEST_F(MasterLexerStateTest, crlf) {
     // 'end-of-line'.  then there will be "initial WS"
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // see ' ', "unget" it
-    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    s_crlf.handle(lexer);
     EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
     EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
 
     // 3. comment between \r and \n
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // skip comments, recognize '\n'
-    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    s_crlf.handle(lexer);
     EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // skip 'a'
+    s_string.handle(lexer); // skip 'a'
 
     // 4. \r then EOF
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // see EOF, then "unget" it
-    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    s_crlf.handle(lexer);
     EXPECT_EQ(s_null, State::start(lexer, common_options));  // recognize EOF
     EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
 }
@@ -281,41 +281,41 @@ TEST_F(MasterLexerStateTest, string) {
     lexer.pushSource(ss);
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+    s_string.handle(lexer); // recognize str, see \n
     EXPECT_FALSE(s_string.wasLastEOL(lexer));
     stringTokenCheck("followed-by-EOL", s_string.getToken(lexer));
     EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \r
+    s_string.handle(lexer); // recognize str, see \r
     stringTokenCheck("followed-by-CR", s_string.getToken(lexer));
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // handle \r...
-    EXPECT_EQ(s_null, s_crlf.handle(lexer)); // ...and skip it
+    s_crlf.handle(lexer); // ...and skip it
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' '
+    s_string.handle(lexer); // recognize str, see ' '
     stringTokenCheck("followed-by-space", s_string.getToken(lexer));
 
     // skip ' ', then recognize the next string
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \t
+    s_string.handle(lexer); // recognize str, see \t
     stringTokenCheck("followed-by-tab", s_string.getToken(lexer));
 
     // skip \t, then recognize the next string
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see comment
+    s_string.handle(lexer); // recognize str, see comment
     stringTokenCheck("followed-by-comment", s_string.getToken(lexer));
     EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see '('
+    s_string.handle(lexer); // recognize str, see '('
     stringTokenCheck("followed-by-paren", s_string.getToken(lexer));
     EXPECT_EQ(&s_string, State::start(lexer, common_options)); // str in ()
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize the str, see ')'
+    s_string.handle(lexer); // recognize the str, see ')'
     stringTokenCheck("closing", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see EOF
+    s_string.handle(lexer); // recognize str, see EOF
     stringTokenCheck("followed-by-EOF", s_string.getToken(lexer));
 }
 
@@ -331,32 +331,32 @@ TEST_F(MasterLexerStateTest, stringEscape) {
     lexer.pushSource(ss);
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("escaped\\ space", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("escaped\\\ttab", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("escaped\\(paren", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("escaped\\)close", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("escaped\\;comment", s_string.getToken(lexer));
 
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' in mid
+    s_string.handle(lexer); // recognize str, see ' ' in mid
     stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
 
     // Confirm the word that follows the escaped '\' is correctly recognized.
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    s_string.handle(lexer); // recognize str, see ' ' at end
     stringTokenCheck("backslash", s_string.getToken(lexer));
 }
 
@@ -376,7 +376,7 @@ TEST_F(MasterLexerStateTest, quotedString) {
 
     // by default, '"' doesn't have any special meaning and part of string
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
-    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+    s_string.handle(lexer); // recognize str, see \n
     stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
     EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
     EXPECT_TRUE(s_string.wasLastEOL(lexer));
@@ -386,35 +386,35 @@ TEST_F(MasterLexerStateTest, quotedString) {
     const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
     EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("quoted string", s_string.getToken(lexer), true);
 
     // Also checks other separator characters within a qstring
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("quoted()\t\rstring", s_string.getToken(lexer), true);
 
     // escape character mostly doesn't have any effect in the qstring
     // processing
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
 
     // The only exception is the quotation mark itself.  Note that the escape
     // only works on the quotation mark immediately after it.
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
 
     // quoted '\' then '"'.  Unlike the previous case '"' shouldn't be
     // escaped.
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
 
     // ';' has no meaning in a quoted string (not indicating a comment)
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("no;comment", s_string.getToken(lexer), true);
 }
 
@@ -427,7 +427,7 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
     // EOL is encountered without closing the quote
     const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
     EXPECT_EQ(Token::UNBALANCED_QUOTES,
               s_qstring.getToken(lexer).getErrorCode());
@@ -437,12 +437,12 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
 
     // \n is okay in a quoted string if escaped
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
 
     // EOF is encountered without closing the quote
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
-    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    s_qstring.handle(lexer);
     ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
     EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
     // If we continue we'll simply see the EOF

+ 5 - 2
src/lib/dns/tests/master_lexer_token_unittest.cc

@@ -142,15 +142,18 @@ TEST_F(MasterLexerTokenTest, errors) {
     EXPECT_EQ("unbalanced quotes",
               MasterLexer::Token(MasterLexer::Token::UNBALANCED_QUOTES).
               getErrorText());
+    EXPECT_EQ("no token produced",
+              MasterLexer::Token(MasterLexer::Token::NO_TOKEN_PRODUCED).
+              getErrorText());
 
     // getErrorCode/Text() isn't allowed for non number types
     EXPECT_THROW(token_num.getErrorCode(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getErrorText(), isc::InvalidOperation);
 
-    // Only the pre-defined error code is accepted.  Hardcoding '4' (max code
+    // Only the pre-defined error code is accepted.  Hardcoding '5' (max code
     // + 1) is intentional; it'd be actually better if we notice it when we
     // update the enum list (which shouldn't happen too often).
-    EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(4)),
+    EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(5)),
                  isc::InvalidParameter);
 
     // Check the coexistence of "from number" and "from error-code"

+ 160 - 0
src/lib/dns/tests/master_lexer_unittest.cc

@@ -15,10 +15,14 @@
 #include <exceptions/exceptions.h>
 
 #include <dns/master_lexer.h>
+#include <dns/master_lexer_state.h>
 
 #include <gtest/gtest.h>
 
 #include <boost/lexical_cast.hpp>
+#include <boost/function.hpp>
+#include <boost/scoped_ptr.hpp>
+#include <boost/bind.hpp>
 
 #include <string>
 #include <sstream>
@@ -27,6 +31,8 @@ using namespace isc::dns;
 using std::string;
 using std::stringstream;
 using boost::lexical_cast;
+using boost::scoped_ptr;
+using master_lexer_internal::State;
 
 namespace {
 
@@ -124,4 +130,158 @@ TEST_F(MasterLexerTest, invalidPop) {
     EXPECT_THROW(lexer.popSource(), isc::InvalidOperation);
 }
 
+// Test it is not possible to get token when no source is available.
+TEST_F(MasterLexerTest, noSource) {
+    EXPECT_THROW(lexer.getNextToken(), isc::InvalidOperation);
+}
+
+// Test getting some tokens
+TEST_F(MasterLexerTest, getNextToken) {
+    ss << "\n   \n\"STRING\"\n";
+    lexer.pushSource(ss);
+
+    // First, the newline should get out.
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // Then the whitespace, if we specify the option.
+    EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
+              lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
+    // The newline
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // The (quoted) string
+    EXPECT_EQ(MasterLexer::Token::QSTRING,
+              lexer.getNextToken(MasterLexer::QSTRING).getType());
+
+    // And the end of line and file
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+}
+
+// Test we correctly find end of file.
+TEST_F(MasterLexerTest, eof) {
+    // Let the ss empty.
+    lexer.pushSource(ss);
+
+    // The first one is found to be EOF
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+    // And it stays on EOF for any following attempts
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+    // And we can step back one token, but that is the EOF too.
+    lexer.ungetToken();
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+}
+
+// Check we properly return error when there's an opened parentheses and no
+// closing one
+TEST_F(MasterLexerTest, getUnbalancedParen) {
+    ss << "(\"string\"";
+    lexer.pushSource(ss);
+
+    // The string gets out first
+    EXPECT_EQ(MasterLexer::Token::STRING, lexer.getNextToken().getType());
+    // Then an unbalanced parenthesis
+    EXPECT_EQ(MasterLexer::Token::UNBALANCED_PAREN,
+              lexer.getNextToken().getErrorCode());
+    // And then EOF
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+}
+
+// Check we properly return error when there's an opened quoted string and no
+// closing one
+TEST_F(MasterLexerTest, getUnbalancedString) {
+    ss << "\"string";
+    lexer.pushSource(ss);
+
+    // Then an unbalanced qstring (reported as an unexpected end)
+    EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END,
+              lexer.getNextToken(MasterLexer::QSTRING).getErrorCode());
+    // And then EOF
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
+}
+
+// Test ungetting tokens works
+TEST_F(MasterLexerTest, ungetToken) {
+    ss << "\n (\"string\"\n) more";
+    lexer.pushSource(ss);
+
+    // Try getting the newline
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // Return it and get again
+    lexer.ungetToken();
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // Get the string and return it back
+    EXPECT_EQ(MasterLexer::Token::QSTRING,
+              lexer.getNextToken(MasterLexer::QSTRING).getType());
+    lexer.ungetToken();
+    // But if we change the options, it honors them
+    EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
+              lexer.getNextToken(MasterLexer::QSTRING |
+                                 MasterLexer::INITIAL_WS).getType());
+    // Get to the "more" string
+    EXPECT_EQ(MasterLexer::Token::QSTRING,
+              lexer.getNextToken(MasterLexer::QSTRING).getType());
+    EXPECT_EQ(MasterLexer::Token::STRING,
+              lexer.getNextToken(MasterLexer::QSTRING).getType());
+    // Return it back. It should get inside the parentheses.
+    // Upon next attempt to get it again, the newline inside the parentheses
+    // should be still ignored.
+    lexer.ungetToken();
+    EXPECT_EQ(MasterLexer::Token::STRING,
+              lexer.getNextToken(MasterLexer::QSTRING).getType());
+}
+
+// Check ungetting token without overriding the start method. We also
+// check it works well with changing options between the calls.
+TEST_F(MasterLexerTest, ungetRealOptions) {
+    ss << "\n    \n";
+    lexer.pushSource(ss);
+    // Skip the first newline
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+
+    // If we call it the usual way, it skips up to the newline and returns
+    // it
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+
+    // Now we return it. If we call it again, but with different options,
+    // we get the initial whitespace.
+    lexer.ungetToken();
+    EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
+              lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
+}
+
+// Test only one token can be ungotten
+TEST_F(MasterLexerTest, ungetTwice) {
+    ss << "\n";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // Unget the token. It can be done once
+    lexer.ungetToken();
+    // But not twice
+    EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
+}
+
+// Test we can't unget a token before we get one
+TEST_F(MasterLexerTest, ungetBeforeGet) {
+    lexer.pushSource(ss); // Just to eliminate the missing source problem
+    EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
+}
+
+// Test we can't unget a token after a source switch, even when we got
+// something before.
+TEST_F(MasterLexerTest, ungetAfterSwitch) {
+    ss << "\n\n";
+    lexer.pushSource(ss);
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // Switch the source
+    std::stringstream ss2;
+    ss2 << "\n\n";
+    lexer.pushSource(ss2);
+    EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
+    // We can get from the new source
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
+    // And when we drop the current source, we can't unget again
+    lexer.popSource();
+    EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
+}
+
 }