Browse Source

[master] Merge branch 'trac2372'

JINMEI Tatuya 12 years ago
parent
commit
9a11ef62ae

+ 1 - 0
src/lib/dns/Makefile.am

@@ -97,6 +97,7 @@ libb10_dns___la_SOURCES += master_lexer_inputsource.h master_lexer_inputsource.c
 libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
 libb10_dns___la_SOURCES += masterload.h masterload.cc
 libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
+libb10_dns___la_SOURCES += master_lexer_state.h
 libb10_dns___la_SOURCES += message.h message.cc
 libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
 libb10_dns___la_SOURCES += name.h name.cc

+ 168 - 2
src/lib/dns/master_lexer.cc

@@ -16,6 +16,7 @@
 
 #include <dns/master_lexer.h>
 #include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
 
 #include <boost/shared_ptr.hpp>
 
@@ -32,10 +33,34 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
 using namespace master_lexer_internal;
 
 struct MasterLexer::MasterLexerImpl {
-    MasterLexerImpl() : token_(Token::NOT_STARTED) {}
+    MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
+                        paren_count_(0), last_was_eol_(false)
+    {}
+
+    // A helper method to skip possible comments toward the end of EOL or EOF.
+    // commonly used by state classes.  It returns the corresponding "end-of"
+    // character in case it's a comment; otherwise it simply returns the
+    // current character.
+    int skipComment(int c) {
+        if (c == ';') {
+            while (true) {
+                c = source_->getChar();
+                if (c == '\n' || c == InputSource::END_OF_STREAM) {
+                    return (c);
+                }
+            }
+        }
+        return (c);
+    }
 
     std::vector<InputSourcePtr> sources_;
-    Token token_;
+    InputSource* source_;       // current source (NULL if sources_ is empty)
+    Token token_;               // currently recognized token (set by a state)
+
+    // These are used in states, and defined here only as a placeholder.
+    // The main lexer class does not need these members.
+    size_t paren_count_;        // nest count of the parentheses
+    bool last_was_eol_; // whether the lexer just passed an end-of-line
 };
 
 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
@@ -60,12 +85,14 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
         return (false);
     }
 
+    impl_->source_ = impl_->sources_.back().get();
     return (true);
 }
 
 void
 MasterLexer::pushSource(std::istream& input) {
     impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
+    impl_->source_ = impl_->sources_.back().get();
 }
 
 void
@@ -75,6 +102,8 @@ MasterLexer::popSource() {
                   "MasterLexer::popSource on an empty source");
     }
     impl_->sources_.pop_back();
+    impl_->source_ = impl_->sources_.empty() ? NULL :
+        impl_->sources_.back().get();
 }
 
 std::string
@@ -115,5 +144,142 @@ MasterLexer::Token::getErrorText() const {
     return (error_text[val_.error_code_]);
 }
 
+namespace master_lexer_internal {
+// Below we implement state classes for state transitions of MasterLexer.
+// Note that these need to be defined here so that they can refer to
+// the details of MasterLexerImpl.
+
+typedef MasterLexer::Token Token; // convenience shortcut
+
+bool
+State::wasLastEOL(const MasterLexer& lexer) const {
+    return (lexer.impl_->last_was_eol_);
+}
+
+const MasterLexer::Token&
+State::getToken(const MasterLexer& lexer) const {
+    return (lexer.impl_->token_);
+}
+
+size_t
+State::getParenCount(const MasterLexer& lexer) const {
+    return (lexer.impl_->paren_count_);
+}
+
+namespace {
+class CRLF : public State {
+public:
+    CRLF() {}
+    virtual const State* handle(MasterLexer& lexer) const {
+        // We've just seen '\r'.  If this is part of a sequence of '\r\n',
+        // we combine them as a single END-OF-LINE.  Otherwise we treat the
+        // single '\r' as an EOL and continue tokeniziation from the character
+        // immediately after '\r'.  One tricky case is that there's a comment
+        // between '\r' and '\n'.  This implementation combines these
+        // characters and treats them as a single EOL (the behavior derived
+        // from BIND 9).  Technically this may not be correct, but in practice
+        // the caller wouldn't distinguish this case from the case it has
+        // two EOLs, so we simplify the process.
+        const int c = getLexerImpl(lexer)->skipComment(
+            getLexerImpl(lexer)->source_->getChar());
+        if (c != '\n') {
+            getLexerImpl(lexer)->source_->ungetChar();
+        }
+        getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+        getLexerImpl(lexer)->last_was_eol_ = true;
+        return (NULL);
+    }
+};
+
+// Currently this is provided mostly as a place holder
+class String : public State {
+public:
+    String() {}
+    virtual const State* handle(MasterLexer& /*lexer*/) const {
+        return (NULL);
+    }
+};
+
+// We use a common instance of a each state in a singleton-like way to save
+// construction overhead.  They are not singletons in its strict sense as
+// we don't prohibit direct construction of these objects.  But that doesn't
+// matter much anyway, because the definitions are completely hidden within
+// this file.
+const CRLF CRLF_STATE;
+const String STRING_STATE;
+}
+
+const State&
+State::getInstance(ID state_id) {
+    switch (state_id) {
+    case CRLF:
+        return (CRLF_STATE);
+    case String:
+        return (STRING_STATE);
+    }
+
+    // This is a bug of the caller, and this method is only expected to be
+    // used by tests, so we just forcefully make it fail by asserting the
+    // condition.
+    assert(false);
+    return (STRING_STATE); // a dummy return, to silence some compilers.
+}
+
+const State*
+State::start(MasterLexer& lexer, MasterLexer::Options options) {
+    // define some shortcuts
+    MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
+    size_t& paren_count = lexerimpl.paren_count_;
+
+    while (true) {
+        const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
+        if (c == InputSource::END_OF_STREAM) {
+            lexerimpl.last_was_eol_ = false;
+            if (paren_count != 0) {
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+                paren_count = 0; // reset to 0; this helps in lenient mode.
+                return (NULL);
+            }
+            lexerimpl.token_ = Token(Token::END_OF_FILE);
+            return (NULL);
+        } else if (c == ' ' || c == '\t') {
+            // If requested and we are not in (), recognize the initial space.
+            if (lexerimpl.last_was_eol_ && paren_count == 0 &&
+                (options & MasterLexer::INITIAL_WS) != 0) {
+                lexerimpl.last_was_eol_ = false;
+                lexerimpl.token_ = Token(Token::INITIAL_WS);
+                return (NULL);
+            }
+        } else if (c == '\n') {
+            lexerimpl.last_was_eol_ = true;
+            if (paren_count == 0) { // we don't recognize EOL if we are in ()
+                lexerimpl.token_ = Token(Token::END_OF_LINE);
+                return (NULL);
+            }
+        } else if (c == '\r') {
+            if (paren_count == 0) { // check if we are in () (see above)
+                return (&CRLF_STATE);
+            }
+        } else if (c == '(') {
+            lexerimpl.last_was_eol_ = false;
+            ++paren_count;
+        } else if (c == ')') {
+            lexerimpl.last_was_eol_ = false;
+            if (paren_count == 0) {
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+                return (NULL);
+            }
+            --paren_count;
+        } else {
+            // Note: in #2373 we should probably ungetChar().
+            lexerimpl.last_was_eol_ = false;
+            return (&STRING_STATE);
+        }
+        // no code should be here; we just continue the loop.
+    }
+}
+
+} // namespace master_lexer_internal
+
 } // end of namespace dns
 } // end of namespace isc

+ 28 - 1
src/lib/dns/master_lexer.h

@@ -24,6 +24,9 @@
 
 namespace isc {
 namespace dns {
+namespace master_lexer_internal {
+class State;
+}
 
 /// \brief Tokenizer for parsing DNS master files.
 ///
@@ -64,9 +67,22 @@ namespace dns {
 /// this class does not throw for an error that would be reported as an
 /// exception in other classes.
 class MasterLexer {
+    friend class master_lexer_internal::State;
 public:
     class Token;       // we define it separately for better readability
 
+    /// \brief Options for getNextToken.
+    ///
+    /// A compound option, indicating multiple options are set, can be
+    /// specified using the logical OR operator (operator|()).
+    enum Options {
+        NONE = 0,               ///< No option
+        INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
+                        ///< end-of-line
+        QSTRING = 2,    ///< recognize quoted string
+        NUMBER = 4   ///< recognize numeric text as integer
+    };
+
     /// \brief The constructor.
     ///
     /// \throw std::bad_alloc Internal resource allocation fails (rare case).
@@ -167,6 +183,16 @@ private:
     MasterLexerImpl* impl_;
 };
 
+/// \brief Operator to combine \c MasterLexer options
+///
+/// This is a trivial shortcut so that compound options can be specified
+/// in an intuitive way.
+inline MasterLexer::Options
+operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
+    return (static_cast<MasterLexer::Options>(
+                static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
+}
+
 /// \brief Tokens for \c MasterLexer
 ///
 /// This is a simple value-class encapsulating a type of a lexer token and
@@ -192,7 +218,8 @@ public:
     enum Type {
         END_OF_LINE, ///< End of line detected (if asked for detecting it)
         END_OF_FILE, ///< End of file detected (if asked for detecting it)
-        INITIAL_WS,  ///< White spaces at the beginning of a line
+        INITIAL_WS,  ///< White spaces at the beginning of a line after an
+                     ///< end of line
         NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
                                        /// no-value (type only) types.
                                        /// Mainly for internal use.

+ 138 - 0
src/lib/dns/master_lexer_state.h

@@ -0,0 +1,138 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef MASTER_LEXER_STATE_H
+#define MASTER_LEXER_STATE_H 1
+
+#include <dns/master_lexer.h>
+
+namespace isc {
+namespace dns {
+
+namespace master_lexer_internal {
+
+/// \brief Tokenization state for \c MasterLexer.
+///
+/// This is a base class of classes that represent various states of a single
+/// tokenization session of \c MasterLexer, i.e., the states used for a
+/// single call to \c MasterLexer::getNextToken().
+///
+/// It follows the convention of the state design pattern: each derived class
+/// corresponds to a specific state, and the state transition takes place
+/// through the virtual method named \c handle().  The \c handle() method
+/// takes the main \c MasterLexer object that holds all necessary internal
+/// context, and updates it as necessary; each \c State derived class is
+/// completely stateless.
+///
+/// The initial transition takes place in a static method of the base class,
+/// \c start().  This is mainly for implementation convenience; we need to
+/// pass options given to \c MasterLexer::getNextToken() for the initial
+/// state, so it makes more sense to separate the interface for the transition
+/// from the initial state.
+///
+/// When an object of a specific state class completes the session, it
+/// normally sets the identified token in the lexer, and returns NULL;
+/// if more transition is necessary, it returns a pointer to the next state
+/// object.
+///
+/// As is usual in the state design pattern, the \c State class is made
+/// a friend class of \c MasterLexer and can refer to its internal details.
+/// This is intentional; essentially its a part of \c MasterLexer and
+/// is defined as a separate class only for implementation clarity and better
+/// testability.  It's defined in a publicly visible header, but that's only
+/// for testing purposes.  No normal application or even no other classes of
+/// this library are expected to use this class.
+class State {
+public:
+    /// \brief Begin state transitions to get the next token.
+    ///
+    /// This is the first method that \c MasterLexer needs to call for a
+    /// tokenization session.  The lexer passes a reference to itself
+    /// and options given in \c getNextToken().
+    ///
+    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw std::bad_alloc Internal resource allocation failure
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \param options The options passed to getNextToken().
+    /// \return A pointer to the next state object or NULL if the transition
+    /// is completed.
+    static const State* start(MasterLexer& lexer,
+                              MasterLexer::Options options);
+
+    /// \brief Handle the process of one specific state.
+    ///
+    /// This method is expected to be called on the object returned by
+    /// start(), and keep called on the returned object until NULL is
+    /// returned.  The call chain will form the complete state transition.
+    ///
+    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw std::bad_alloc Internal resource allocation failure
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \return A pointer to the next state object or NULL if the transition
+    /// is completed.
+    virtual const State* handle(MasterLexer& lexer) const = 0;
+
+    /// \brief Types of states.
+    ///
+    /// Specific states are basically hidden within the implementation,
+    /// but we'd like to allow tests to examine them, so we provide
+    /// a way to get an instance of a specific state.
+    enum ID {
+        CRLF,                  ///< Just seen a carriage-return character
+        String                 ///< Handling a string token
+    };
+
+    /// \brief Returns a \c State instance of the given state.
+    ///
+    /// This is provided only for testing purposes so tests can check
+    /// the behavior of each state separately.  \c MasterLexer shouldn't
+    /// need this method.
+    static const State& getInstance(ID state_id);
+
+    /// \name Read-only accessors for testing purposes.
+    ///
+    /// These allow tests to inspect some selected portion of the internal
+    /// states of \c MasterLexer.  These shouldn't be used except for testing
+    /// purposes.
+    ///@{
+    bool wasLastEOL(const MasterLexer& lexer) const;
+    const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
+    size_t getParenCount(const MasterLexer& lexer) const;
+    ///@}
+
+protected:
+    /// \brief An accessor to the internal implementation class of
+    /// \c MasterLexer.
+    ///
+    /// This is provided for specific derived classes as they are not direct
+    /// friends of \c MasterLexer.
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \return A pointer to the implementation class object of the given
+    /// lexer.  This is never NULL.
+    MasterLexer::MasterLexerImpl* getLexerImpl(MasterLexer& lexer) const {
+        return (lexer.impl_);
+    }
+};
+
+} // namespace master_lexer_internal
+} // namespace dns
+} // namespace isc
+#endif  // MASTER_LEXER_STATE_H
+
+// Local Variables:
+// mode: c++
+// End:

+ 1 - 0
src/lib/dns/tests/Makefile.am

@@ -27,6 +27,7 @@ run_unittests_SOURCES += labelsequence_unittest.cc
 run_unittests_SOURCES += messagerenderer_unittest.cc
 run_unittests_SOURCES += master_lexer_token_unittest.cc
 run_unittests_SOURCES += master_lexer_unittest.cc
+run_unittests_SOURCES += master_lexer_state_unittest.cc
 run_unittests_SOURCES += name_unittest.cc
 run_unittests_SOURCES += nsec3hash_unittest.cc
 run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc

+ 256 - 0
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -0,0 +1,256 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <dns/master_lexer.h>
+#include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+using namespace isc::dns;
+using namespace master_lexer_internal;
+
+namespace {
+typedef MasterLexer::Token Token; // shortcut
+
+class MasterLexerStateTest : public ::testing::Test {
+protected:
+    MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
+                             s_null(NULL),
+                             s_crlf(State::getInstance(State::CRLF)),
+                             s_string(State::getInstance(State::String)),
+                             options(MasterLexer::NONE),
+                             orig_options(options)
+    {}
+
+    // Specify INITIAL_WS as common initial options.
+    const MasterLexer::Options common_options;
+    MasterLexer lexer;
+    const State* const s_null;
+    const State& s_crlf;
+    const State& s_string;
+    std::stringstream ss;
+    MasterLexer::Options options, orig_options;
+};
+
+// Common check for the end-of-file condition.
+// Token is set to END_OF_FILE, and the lexer was NOT last eol state.
+// Passed state can be any valid one; they are stateless, just providing the
+// interface for inspection.
+void
+eofCheck(const State& state, MasterLexer& lexer) {
+    EXPECT_EQ(Token::END_OF_FILE, state.getToken(lexer).getType());
+    EXPECT_FALSE(state.wasLastEOL(lexer));
+}
+
+TEST_F(MasterLexerStateTest, startAndEnd) {
+    // A simple case: the input is empty, so we begin with start and
+    // are immediately done.
+    lexer.pushSource(ss);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, startToEOL) {
+    ss << "\n";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // The next lexer session will reach EOF.  Same eof check should pass.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, space) {
+    // repeat '\t\n' twice (see below), then space after EOL
+    ss << " \t\n\t\n ";
+    lexer.pushSource(ss);
+
+    // by default space characters and tabs will be ignored.  We check this
+    // twice; at the second iteration, it's a white space at the beginning
+    // of line, but since we don't specify INITIAL_WS option, it's treated as
+    // normal space and ignored.
+    for (size_t i = 0; i < 2; ++i) {
+        EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
+        EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+        EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    }
+
+    // Now we specify the INITIAL_WS option.  It will be recognized and the
+    // corresponding token will be returned.
+    EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, parentheses) {
+    ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
+
+    // Now handle '('.  It skips \n and recognize 'a' as string
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+    // skip 'a' (note: until #2373 it's actually skipped as part of the '('
+    // handling)
+    s_string.handle(lexer);
+
+    // Then handle ')'.  '\n' before ')' isn't recognized because
+    // it's canceled due to the '('.  Likewise, the space after the '\n'
+    // shouldn't be recognized but should be just ignored.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+
+    // Now, temporarily disabled options are restored: Both EOL and the
+    // initial WS are recognized
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, nestedParentheses) {
+    // This is an unusual, but allowed (in this implementation) case.
+    ss << "(a(b)\n c)\n ";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                     // consume 'b'
+    EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
+
+    // Close the inner most parentheses.  count will be decreased, but option
+    // shouldn't be restored yet, so the intermediate EOL or initial WS won't
+    // be recognized.
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
+    s_string.handle(lexer);                      // consume 'c'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+
+    // Close the outermost parentheses.  count will be reset to 0, and original
+    // options are restored.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+
+    // Now, temporarily disabled options are restored: Both EOL and the
+    // initial WS are recognized
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, unbalancedParentheses) {
+    // Only closing paren is provided.  We prepend a \n to check if it's
+    // correctly canceled after detecting the error.
+    ss << "\n)";
+    ss << "(a";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
+
+    // Now checking ')'.  The result should be error, count shouldn't be
+    // changed.  "last EOL" should be canceled.
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+    // Reach EOF with a dangling open parenthesis.
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));    // reach EOF
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
+}
+
+TEST_F(MasterLexerStateTest, startToComment) {
+    // Begin with 'start', skip space, then encounter a comment.  Skip
+    // the rest of the line, and recognize the new line.  Note that the
+    // second ';' is simply ignored.
+    ss << "  ;a;\n";
+    ss << ";a;";           // Likewise, but the comment ends with EOF.
+    lexer.pushSource(ss);
+
+    // Comment ending with EOL
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // Comment ending with EOF
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, commentAfterParen) {
+    // comment after an opening parenthesis.  The code that is tested by
+    // other tests should also ensure that it works correctly, but we
+    // check it explicitly.
+    ss << "( ;this is a comment\na)\n";
+    lexer.pushSource(ss);
+
+    // consume '(', skip comments, consume 'a', then consume ')'
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    s_string.handle(lexer);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, crlf) {
+    ss << "\r\n";               // case 1
+    ss << "\r ";                // case 2
+    ss << "\r;comment\na";      // case 3
+    ss << "\r";                 // case 4
+    lexer.pushSource(ss);
+
+    // 1. A sequence of \r, \n is recognized as a single 'end-of-line'
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+
+    // 2. Single '\r' (not followed by \n) is recognized as a single
+    // 'end-of-line'.  then there will be "initial WS"
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // see ' ', "unget" it
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+
+    // 3. comment between \r and \n
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // skip comments, recognize '\n'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+
+    // 4. \r then EOF
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // see EOF, then "unget" it
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));  // recognize EOF
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+}