12 years ago · 9a11ef62ae
--- a/src/lib/dns/Makefile.am
+++ b/src/lib/dns/Makefile.am
@@ -97,6 +97,7 @@ libb10_dns___la_SOURCES += master_lexer_inputsource.h master_lexer_inputsource.c
 
				 libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
			
 
				 libb10_dns___la_SOURCES += masterload.h masterload.cc
			
 
				 libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
			
 
				+libb10_dns___la_SOURCES += master_lexer_state.h
			
 
				 libb10_dns___la_SOURCES += message.h message.cc
			
 
				 libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
			
 
				 libb10_dns___la_SOURCES += name.h name.cc
			
--- a/src/lib/dns/master_lexer.cc
+++ b/src/lib/dns/master_lexer.cc
@@ -16,6 +16,7 @@
 
				 
			
 
				 #include <dns/master_lexer.h>
			
 
				 #include <dns/master_lexer_inputsource.h>
			
 
				+#include <dns/master_lexer_state.h>
			
 
				 
			
 
				 #include <boost/shared_ptr.hpp>
			
 
				 
			
@@ -32,10 +33,34 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
 
				 using namespace master_lexer_internal;
			
 
				 
			
 
				 struct MasterLexer::MasterLexerImpl {
			
 
				-    MasterLexerImpl() : token_(Token::NOT_STARTED) {}
			
 
				+    MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
			
 
				+                        paren_count_(0), last_was_eol_(false)
			
 
				+    {}
			
 
				+
			
 
				+    // A helper method to skip possible comments toward the end of EOL or EOF.
			
 
				+    // commonly used by state classes.  It returns the corresponding "end-of"
			
 
				+    // character in case it's a comment; otherwise it simply returns the
			
 
				+    // current character.
			
 
				+    int skipComment(int c) {
			
 
				+        if (c == ';') {
			
 
				+            while (true) {
			
 
				+                c = source_->getChar();
			
 
				+                if (c == '\n' || c == InputSource::END_OF_STREAM) {
			
 
				+                    return (c);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        return (c);
			
 
				+    }
			
 
				 
			
 
				     std::vector<InputSourcePtr> sources_;
			
 
				-    Token token_;
			
 
				+    InputSource* source_;       // current source (NULL if sources_ is empty)
			
 
				+    Token token_;               // currently recognized token (set by a state)
			
 
				+
			
 
				+    // These are used in states, and defined here only as a placeholder.
			
 
				+    // The main lexer class does not need these members.
			
 
				+    size_t paren_count_;        // nest count of the parentheses
			
 
				+    bool last_was_eol_; // whether the lexer just passed an end-of-line
			
 
				 };
			
 
				 
			
 
				 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
			
@@ -60,12 +85,14 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
 
				         return (false);
			
 
				     }
			
 
				 
			
 
				+    impl_->source_ = impl_->sources_.back().get();
			
 
				     return (true);
			
 
				 }
			
 
				 
			
 
				 void
			
 
				 MasterLexer::pushSource(std::istream& input) {
			
 
				     impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
			
 
				+    impl_->source_ = impl_->sources_.back().get();
			
 
				 }
			
 
				 
			
 
				 void
			
@@ -75,6 +102,8 @@ MasterLexer::popSource() {
 
				                   "MasterLexer::popSource on an empty source");
			
 
				     }
			
 
				     impl_->sources_.pop_back();
			
 
				+    impl_->source_ = impl_->sources_.empty() ? NULL :
			
 
				+        impl_->sources_.back().get();
			
 
				 }
			
 
				 
			
 
				 std::string
			
@@ -115,5 +144,142 @@ MasterLexer::Token::getErrorText() const {
 
				     return (error_text[val_.error_code_]);
			
 
				 }
			
 
				 
			
 
				+namespace master_lexer_internal {
			
 
				+// Below we implement state classes for state transitions of MasterLexer.
			
 
				+// Note that these need to be defined here so that they can refer to
			
 
				+// the details of MasterLexerImpl.
			
 
				+
			
 
				+typedef MasterLexer::Token Token; // convenience shortcut
			
 
				+
			
 
				+bool
			
 
				+State::wasLastEOL(const MasterLexer& lexer) const {
			
 
				+    return (lexer.impl_->last_was_eol_);
			
 
				+}
			
 
				+
			
 
				+const MasterLexer::Token&
			
 
				+State::getToken(const MasterLexer& lexer) const {
			
 
				+    return (lexer.impl_->token_);
			
 
				+}
			
 
				+
			
 
				+size_t
			
 
				+State::getParenCount(const MasterLexer& lexer) const {
			
 
				+    return (lexer.impl_->paren_count_);
			
 
				+}
			
 
				+
			
 
				+namespace {
			
 
				+class CRLF : public State {
			
 
				+public:
			
 
				+    CRLF() {}
			
 
				+    virtual const State* handle(MasterLexer& lexer) const {
			
 
				+        // We've just seen '\r'.  If this is part of a sequence of '\r\n',
			
 
				+        // we combine them as a single END-OF-LINE.  Otherwise we treat the
			
 
				+        // single '\r' as an EOL and continue tokeniziation from the character
			
 
				+        // immediately after '\r'.  One tricky case is that there's a comment
			
 
				+        // between '\r' and '\n'.  This implementation combines these
			
 
				+        // characters and treats them as a single EOL (the behavior derived
			
 
				+        // from BIND 9).  Technically this may not be correct, but in practice
			
 
				+        // the caller wouldn't distinguish this case from the case it has
			
 
				+        // two EOLs, so we simplify the process.
			
 
				+        const int c = getLexerImpl(lexer)->skipComment(
			
 
				+            getLexerImpl(lexer)->source_->getChar());
			
 
				+        if (c != '\n') {
			
 
				+            getLexerImpl(lexer)->source_->ungetChar();
			
 
				+        }
			
 
				+        getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
			
 
				+        getLexerImpl(lexer)->last_was_eol_ = true;
			
 
				+        return (NULL);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+// Currently this is provided mostly as a place holder
			
 
				+class String : public State {
			
 
				+public:
			
 
				+    String() {}
			
 
				+    virtual const State* handle(MasterLexer& /*lexer*/) const {
			
 
				+        return (NULL);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+// We use a common instance of a each state in a singleton-like way to save
			
 
				+// construction overhead.  They are not singletons in its strict sense as
			
 
				+// we don't prohibit direct construction of these objects.  But that doesn't
			
 
				+// matter much anyway, because the definitions are completely hidden within
			
 
				+// this file.
			
 
				+const CRLF CRLF_STATE;
			
 
				+const String STRING_STATE;
			
 
				+}
			
 
				+
			
 
				+const State&
			
 
				+State::getInstance(ID state_id) {
			
 
				+    switch (state_id) {
			
 
				+    case CRLF:
			
 
				+        return (CRLF_STATE);
			
 
				+    case String:
			
 
				+        return (STRING_STATE);
			
 
				+    }
			
 
				+
			
 
				+    // This is a bug of the caller, and this method is only expected to be
			
 
				+    // used by tests, so we just forcefully make it fail by asserting the
			
 
				+    // condition.
			
 
				+    assert(false);
			
 
				+    return (STRING_STATE); // a dummy return, to silence some compilers.
			
 
				+}
			
 
				+
			
 
				+const State*
			
 
				+State::start(MasterLexer& lexer, MasterLexer::Options options) {
			
 
				+    // define some shortcuts
			
 
				+    MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
			
 
				+    size_t& paren_count = lexerimpl.paren_count_;
			
 
				+
			
 
				+    while (true) {
			
 
				+        const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
			
 
				+        if (c == InputSource::END_OF_STREAM) {
			
 
				+            lexerimpl.last_was_eol_ = false;
			
 
				+            if (paren_count != 0) {
			
 
				+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
			
 
				+                paren_count = 0; // reset to 0; this helps in lenient mode.
			
 
				+                return (NULL);
			
 
				+            }
			
 
				+            lexerimpl.token_ = Token(Token::END_OF_FILE);
			
 
				+            return (NULL);
			
 
				+        } else if (c == ' ' || c == '\t') {
			
 
				+            // If requested and we are not in (), recognize the initial space.
			
 
				+            if (lexerimpl.last_was_eol_ && paren_count == 0 &&
			
 
				+                (options & MasterLexer::INITIAL_WS) != 0) {
			
 
				+                lexerimpl.last_was_eol_ = false;
			
 
				+                lexerimpl.token_ = Token(Token::INITIAL_WS);
			
 
				+                return (NULL);
			
 
				+            }
			
 
				+        } else if (c == '\n') {
			
 
				+            lexerimpl.last_was_eol_ = true;
			
 
				+            if (paren_count == 0) { // we don't recognize EOL if we are in ()
			
 
				+                lexerimpl.token_ = Token(Token::END_OF_LINE);
			
 
				+                return (NULL);
			
 
				+            }
			
 
				+        } else if (c == '\r') {
			
 
				+            if (paren_count == 0) { // check if we are in () (see above)
			
 
				+                return (&CRLF_STATE);
			
 
				+            }
			
 
				+        } else if (c == '(') {
			
 
				+            lexerimpl.last_was_eol_ = false;
			
 
				+            ++paren_count;
			
 
				+        } else if (c == ')') {
			
 
				+            lexerimpl.last_was_eol_ = false;
			
 
				+            if (paren_count == 0) {
			
 
				+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
			
 
				+                return (NULL);
			
 
				+            }
			
 
				+            --paren_count;
			
 
				+        } else {
			
 
				+            // Note: in #2373 we should probably ungetChar().
			
 
				+            lexerimpl.last_was_eol_ = false;
			
 
				+            return (&STRING_STATE);
			
 
				+        }
			
 
				+        // no code should be here; we just continue the loop.
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+} // namespace master_lexer_internal
			
 
				+
			
 
				 } // end of namespace dns
			
 
				 } // end of namespace isc
			
--- a/src/lib/dns/master_lexer.h
+++ b/src/lib/dns/master_lexer.h
@@ -24,6 +24,9 @@
 
				 
			
 
				 namespace isc {
			
 
				 namespace dns {
			
 
				+namespace master_lexer_internal {
			
 
				+class State;
			
 
				+}
			
 
				 
			
 
				 /// \brief Tokenizer for parsing DNS master files.
			
 
				 ///
			
@@ -64,9 +67,22 @@ namespace dns {
 
				 /// this class does not throw for an error that would be reported as an
			
 
				 /// exception in other classes.
			
 
				 class MasterLexer {
			
 
				+    friend class master_lexer_internal::State;
			
 
				 public:
			
 
				     class Token;       // we define it separately for better readability
			
 
				 
			
 
				+    /// \brief Options for getNextToken.
			
 
				+    ///
			
 
				+    /// A compound option, indicating multiple options are set, can be
			
 
				+    /// specified using the logical OR operator (operator|()).
			
 
				+    enum Options {
			
 
				+        NONE = 0,               ///< No option
			
 
				+        INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
			
 
				+                        ///< end-of-line
			
 
				+        QSTRING = 2,    ///< recognize quoted string
			
 
				+        NUMBER = 4   ///< recognize numeric text as integer
			
 
				+    };
			
 
				+
			
 
				     /// \brief The constructor.
			
 
				     ///
			
 
				     /// \throw std::bad_alloc Internal resource allocation fails (rare case).
			
@@ -167,6 +183,16 @@ private:
 
				     MasterLexerImpl* impl_;
			
 
				 };
			
 
				 
			
 
				+/// \brief Operator to combine \c MasterLexer options
			
 
				+///
			
 
				+/// This is a trivial shortcut so that compound options can be specified
			
 
				+/// in an intuitive way.
			
 
				+inline MasterLexer::Options
			
 
				+operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
			
 
				+    return (static_cast<MasterLexer::Options>(
			
 
				+                static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
			
 
				+}
			
 
				+
			
 
				 /// \brief Tokens for \c MasterLexer
			
 
				 ///
			
 
				 /// This is a simple value-class encapsulating a type of a lexer token and
			
@@ -192,7 +218,8 @@ public:
 
				     enum Type {
			
 
				         END_OF_LINE, ///< End of line detected (if asked for detecting it)
			
 
				         END_OF_FILE, ///< End of file detected (if asked for detecting it)
			
 
				-        INITIAL_WS,  ///< White spaces at the beginning of a line
			
 
				+        INITIAL_WS,  ///< White spaces at the beginning of a line after an
			
 
				+                     ///< end of line
			
 
				         NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
			
 
				                                        /// no-value (type only) types.
			
 
				                                        /// Mainly for internal use.
			
--- a/src/lib/dns/master_lexer_state.h
+++ b/src/lib/dns/master_lexer_state.h
@@ -0,0 +1,138 @@
 
				+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
			
 
				+//
			
 
				+// Permission to use, copy, modify, and/or distribute this software for any
			
 
				+// purpose with or without fee is hereby granted, provided that the above
			
 
				+// copyright notice and this permission notice appear in all copies.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
			
 
				+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
			
 
				+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
			
 
				+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
			
 
				+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
			
 
				+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
			
 
				+// PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+#ifndef MASTER_LEXER_STATE_H
			
 
				+#define MASTER_LEXER_STATE_H 1
			
 
				+
			
 
				+#include <dns/master_lexer.h>
			
 
				+
			
 
				+namespace isc {
			
 
				+namespace dns {
			
 
				+
			
 
				+namespace master_lexer_internal {
			
 
				+
			
 
				+/// \brief Tokenization state for \c MasterLexer.
			
 
				+///
			
 
				+/// This is a base class of classes that represent various states of a single
			
 
				+/// tokenization session of \c MasterLexer, i.e., the states used for a
			
 
				+/// single call to \c MasterLexer::getNextToken().
			
 
				+///
			
 
				+/// It follows the convention of the state design pattern: each derived class
			
 
				+/// corresponds to a specific state, and the state transition takes place
			
 
				+/// through the virtual method named \c handle().  The \c handle() method
			
 
				+/// takes the main \c MasterLexer object that holds all necessary internal
			
 
				+/// context, and updates it as necessary; each \c State derived class is
			
 
				+/// completely stateless.
			
 
				+///
			
 
				+/// The initial transition takes place in a static method of the base class,
			
 
				+/// \c start().  This is mainly for implementation convenience; we need to
			
 
				+/// pass options given to \c MasterLexer::getNextToken() for the initial
			
 
				+/// state, so it makes more sense to separate the interface for the transition
			
 
				+/// from the initial state.
			
 
				+///
			
 
				+/// When an object of a specific state class completes the session, it
			
 
				+/// normally sets the identified token in the lexer, and returns NULL;
			
 
				+/// if more transition is necessary, it returns a pointer to the next state
			
 
				+/// object.
			
 
				+///
			
 
				+/// As is usual in the state design pattern, the \c State class is made
			
 
				+/// a friend class of \c MasterLexer and can refer to its internal details.
			
 
				+/// This is intentional; essentially its a part of \c MasterLexer and
			
 
				+/// is defined as a separate class only for implementation clarity and better
			
 
				+/// testability.  It's defined in a publicly visible header, but that's only
			
 
				+/// for testing purposes.  No normal application or even no other classes of
			
 
				+/// this library are expected to use this class.
			
 
				+class State {
			
 
				+public:
			
 
				+    /// \brief Begin state transitions to get the next token.
			
 
				+    ///
			
 
				+    /// This is the first method that \c MasterLexer needs to call for a
			
 
				+    /// tokenization session.  The lexer passes a reference to itself
			
 
				+    /// and options given in \c getNextToken().
			
 
				+    ///
			
 
				+    /// \throw InputSource::ReadError Unexpected I/O error
			
 
				+    /// \throw std::bad_alloc Internal resource allocation failure
			
 
				+    ///
			
 
				+    /// \param lexer The lexer object that holds the main context.
			
 
				+    /// \param options The options passed to getNextToken().
			
 
				+    /// \return A pointer to the next state object or NULL if the transition
			
 
				+    /// is completed.
			
 
				+    static const State* start(MasterLexer& lexer,
			
 
				+                              MasterLexer::Options options);
			
 
				+
			
 
				+    /// \brief Handle the process of one specific state.
			
 
				+    ///
			
 
				+    /// This method is expected to be called on the object returned by
			
 
				+    /// start(), and keep called on the returned object until NULL is
			
 
				+    /// returned.  The call chain will form the complete state transition.
			
 
				+    ///
			
 
				+    /// \throw InputSource::ReadError Unexpected I/O error
			
 
				+    /// \throw std::bad_alloc Internal resource allocation failure
			
 
				+    ///
			
 
				+    /// \param lexer The lexer object that holds the main context.
			
 
				+    /// \return A pointer to the next state object or NULL if the transition
			
 
				+    /// is completed.
			
 
				+    virtual const State* handle(MasterLexer& lexer) const = 0;
			
 
				+
			
 
				+    /// \brief Types of states.
			
 
				+    ///
			
 
				+    /// Specific states are basically hidden within the implementation,
			
 
				+    /// but we'd like to allow tests to examine them, so we provide
			
 
				+    /// a way to get an instance of a specific state.
			
 
				+    enum ID {
			
 
				+        CRLF,                  ///< Just seen a carriage-return character
			
 
				+        String                 ///< Handling a string token
			
 
				+    };
			
 
				+
			
 
				+    /// \brief Returns a \c State instance of the given state.
			
 
				+    ///
			
 
				+    /// This is provided only for testing purposes so tests can check
			
 
				+    /// the behavior of each state separately.  \c MasterLexer shouldn't
			
 
				+    /// need this method.
			
 
				+    static const State& getInstance(ID state_id);
			
 
				+
			
 
				+    /// \name Read-only accessors for testing purposes.
			
 
				+    ///
			
 
				+    /// These allow tests to inspect some selected portion of the internal
			
 
				+    /// states of \c MasterLexer.  These shouldn't be used except for testing
			
 
				+    /// purposes.
			
 
				+    ///@{
			
 
				+    bool wasLastEOL(const MasterLexer& lexer) const;
			
 
				+    const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
			
 
				+    size_t getParenCount(const MasterLexer& lexer) const;
			
 
				+    ///@}
			
 
				+
			
 
				+protected:
			
 
				+    /// \brief An accessor to the internal implementation class of
			
 
				+    /// \c MasterLexer.
			
 
				+    ///
			
 
				+    /// This is provided for specific derived classes as they are not direct
			
 
				+    /// friends of \c MasterLexer.
			
 
				+    ///
			
 
				+    /// \param lexer The lexer object that holds the main context.
			
 
				+    /// \return A pointer to the implementation class object of the given
			
 
				+    /// lexer.  This is never NULL.
			
 
				+    MasterLexer::MasterLexerImpl* getLexerImpl(MasterLexer& lexer) const {
			
 
				+        return (lexer.impl_);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+} // namespace master_lexer_internal
			
 
				+} // namespace dns
			
 
				+} // namespace isc
			
 
				+#endif  // MASTER_LEXER_STATE_H
			
 
				+
			
 
				+// Local Variables:
			
 
				+// mode: c++
			
 
				+// End:
			
--- a/src/lib/dns/tests/Makefile.am
+++ b/src/lib/dns/tests/Makefile.am
@@ -27,6 +27,7 @@ run_unittests_SOURCES += labelsequence_unittest.cc
 
				 run_unittests_SOURCES += messagerenderer_unittest.cc
			
 
				 run_unittests_SOURCES += master_lexer_token_unittest.cc
			
 
				 run_unittests_SOURCES += master_lexer_unittest.cc
			
 
				+run_unittests_SOURCES += master_lexer_state_unittest.cc
			
 
				 run_unittests_SOURCES += name_unittest.cc
			
 
				 run_unittests_SOURCES += nsec3hash_unittest.cc
			
 
				 run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc
			
--- a/src/lib/dns/tests/master_lexer_state_unittest.cc
+++ b/src/lib/dns/tests/master_lexer_state_unittest.cc
@@ -0,0 +1,256 @@
 
				+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
			
 
				+//
			
 
				+// Permission to use, copy, modify, and/or distribute this software for any
			
 
				+// purpose with or without fee is hereby granted, provided that the above
			
 
				+// copyright notice and this permission notice appear in all copies.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
			
 
				+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
			
 
				+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
			
 
				+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
			
 
				+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
			
 
				+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
			
 
				+// PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+#include <dns/master_lexer.h>
			
 
				+#include <dns/master_lexer_inputsource.h>
			
 
				+#include <dns/master_lexer_state.h>
			
 
				+
			
 
				+#include <gtest/gtest.h>
			
 
				+
			
 
				+#include <sstream>
			
 
				+
			
 
				+using namespace isc::dns;
			
 
				+using namespace master_lexer_internal;
			
 
				+
			
 
				+namespace {
			
 
				+typedef MasterLexer::Token Token; // shortcut
			
 
				+
			
 
				+class MasterLexerStateTest : public ::testing::Test {
			
 
				+protected:
			
 
				+    MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
			
 
				+                             s_null(NULL),
			
 
				+                             s_crlf(State::getInstance(State::CRLF)),
			
 
				+                             s_string(State::getInstance(State::String)),
			
 
				+                             options(MasterLexer::NONE),
			
 
				+                             orig_options(options)
			
 
				+    {}
			
 
				+
			
 
				+    // Specify INITIAL_WS as common initial options.
			
 
				+    const MasterLexer::Options common_options;
			
 
				+    MasterLexer lexer;
			
 
				+    const State* const s_null;
			
 
				+    const State& s_crlf;
			
 
				+    const State& s_string;
			
 
				+    std::stringstream ss;
			
 
				+    MasterLexer::Options options, orig_options;
			
 
				+};
			
 
				+
			
 
				+// Common check for the end-of-file condition.
			
 
				+// Token is set to END_OF_FILE, and the lexer was NOT last eol state.
			
 
				+// Passed state can be any valid one; they are stateless, just providing the
			
 
				+// interface for inspection.
			
 
				+void
			
 
				+eofCheck(const State& state, MasterLexer& lexer) {
			
 
				+    EXPECT_EQ(Token::END_OF_FILE, state.getToken(lexer).getType());
			
 
				+    EXPECT_FALSE(state.wasLastEOL(lexer));
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, startAndEnd) {
			
 
				+    // A simple case: the input is empty, so we begin with start and
			
 
				+    // are immediately done.
			
 
				+    lexer.pushSource(ss);
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    eofCheck(s_crlf, lexer);
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, startToEOL) {
			
 
				+    ss << "\n";
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+
			
 
				+    // The next lexer session will reach EOF.  Same eof check should pass.
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    eofCheck(s_crlf, lexer);
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, space) {
			
 
				+    // repeat '\t\n' twice (see below), then space after EOL
			
 
				+    ss << " \t\n\t\n ";
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    // by default space characters and tabs will be ignored.  We check this
			
 
				+    // twice; at the second iteration, it's a white space at the beginning
			
 
				+    // of line, but since we don't specify INITIAL_WS option, it's treated as
			
 
				+    // normal space and ignored.
			
 
				+    for (size_t i = 0; i < 2; ++i) {
			
 
				+        EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
			
 
				+        EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
			
 
				+        EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+    }
			
 
				+
			
 
				+    // Now we specify the INITIAL_WS option.  It will be recognized and the
			
 
				+    // corresponding token will be returned.
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
			
 
				+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
			
 
				+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, parentheses) {
			
 
				+    ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
			
 
				+
			
 
				+    // Now handle '('.  It skips \n and recognize 'a' as string
			
 
				+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
			
 
				+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
			
 
				+
			
 
				+    // skip 'a' (note: until #2373 it's actually skipped as part of the '('
			
 
				+    // handling)
			
 
				+    s_string.handle(lexer);
			
 
				+
			
 
				+    // Then handle ')'.  '\n' before ')' isn't recognized because
			
 
				+    // it's canceled due to the '('.  Likewise, the space after the '\n'
			
 
				+    // shouldn't be recognized but should be just ignored.
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
			
 
				+
			
 
				+    // Now, temporarily disabled options are restored: Both EOL and the
			
 
				+    // initial WS are recognized
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, nestedParentheses) {
			
 
				+    // This is an unusual, but allowed (in this implementation) case.
			
 
				+    ss << "(a(b)\n c)\n ";
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
			
 
				+    s_string.handle(lexer);                      // consume 'a'
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
			
 
				+    s_string.handle(lexer);                     // consume 'b'
			
 
				+    EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
			
 
				+
			
 
				+    // Close the inner most parentheses.  count will be decreased, but option
			
 
				+    // shouldn't be restored yet, so the intermediate EOL or initial WS won't
			
 
				+    // be recognized.
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
			
 
				+    s_string.handle(lexer);                      // consume 'c'
			
 
				+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
			
 
				+
			
 
				+    // Close the outermost parentheses.  count will be reset to 0, and original
			
 
				+    // options are restored.
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+
			
 
				+    // Now, temporarily disabled options are restored: Both EOL and the
			
 
				+    // initial WS are recognized
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, unbalancedParentheses) {
			
 
				+    // Only closing paren is provided.  We prepend a \n to check if it's
			
 
				+    // correctly canceled after detecting the error.
			
 
				+    ss << "\n)";
			
 
				+    ss << "(a";
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
			
 
				+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
			
 
				+
			
 
				+    // Now checking ')'.  The result should be error, count shouldn't be
			
 
				+    // changed.  "last EOL" should be canceled.
			
 
				+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
			
 
				+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
			
 
				+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
			
 
				+
			
 
				+    // Reach EOF with a dangling open parenthesis.
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
			
 
				+    s_string.handle(lexer);                      // consume 'a'
			
 
				+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));    // reach EOF
			
 
				+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
			
 
				+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, startToComment) {
			
 
				+    // Begin with 'start', skip space, then encounter a comment.  Skip
			
 
				+    // the rest of the line, and recognize the new line.  Note that the
			
 
				+    // second ';' is simply ignored.
			
 
				+    ss << "  ;a;\n";
			
 
				+    ss << ";a;";           // Likewise, but the comment ends with EOF.
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    // Comment ending with EOL
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+
			
 
				+    // Comment ending with EOF
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, commentAfterParen) {
			
 
				+    // comment after an opening parenthesis.  The code that is tested by
			
 
				+    // other tests should also ensure that it works correctly, but we
			
 
				+    // check it explicitly.
			
 
				+    ss << "( ;this is a comment\na)\n";
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    // consume '(', skip comments, consume 'a', then consume ')'
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
			
 
				+    s_string.handle(lexer);
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+TEST_F(MasterLexerStateTest, crlf) {
			
 
				+    ss << "\r\n";               // case 1
			
 
				+    ss << "\r ";                // case 2
			
 
				+    ss << "\r;comment\na";      // case 3
			
 
				+    ss << "\r";                 // case 4
			
 
				+    lexer.pushSource(ss);
			
 
				+
			
 
				+    // 1. A sequence of \r, \n is recognized as a single 'end-of-line'
			
 
				+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
			
 
				+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
			
 
				+
			
 
				+    // 2. Single '\r' (not followed by \n) is recognized as a single
			
 
				+    // 'end-of-line'.  then there will be "initial WS"
			
 
				+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
			
 
				+    // see ' ', "unget" it
			
 
				+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
			
 
				+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
			
 
				+
			
 
				+    // 3. comment between \r and \n
			
 
				+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
			
 
				+    // skip comments, recognize '\n'
			
 
				+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
			
 
				+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
			
 
				+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
			
 
				+
			
 
				+    // 4. \r then EOF
			
 
				+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
			
 
				+    // see EOF, then "unget" it
			
 
				+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
			
 
				+    EXPECT_EQ(s_null, State::start(lexer, common_options));  // recognize EOF
			
 
				+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
			
 
				+}
			
 
				+
			
 
				+}