Browse Source

[2372] even further simplification: deprecate start state

I recognized we only need to pass options to the initial state.  So it makes
more sense to handle this case separately.  this will simplify the interface
for the other classes, and it will make the entry point for the lexer clearer.
JINMEI Tatuya 12 years ago
parent
commit
3be2894baf

+ 19 - 30
src/lib/dns/master_lexer.cc

@@ -156,18 +156,10 @@ State::getParenCount(const MasterLexer& lexer) const {
     return (lexer.impl_->paren_count_);
 }
 
-class Start : public State {
-public:
-    Start() {}
-    virtual const State* handle(MasterLexer& lexer,
-                                MasterLexer::Options options) const;
-};
-
 class CRLF : public State {
 public:
     CRLF() {}
-    virtual const State* handle(MasterLexer& lexer, MasterLexer::Options) const
-    {
+    virtual const State* handle(MasterLexer& lexer) const {
         // We've just seen '\r'.  If this is part of a sequence of '\r\n',
         // we combine them as a single END-OF-LINE.  Otherwise we treat the
         // single '\r' as an EOL and continue tokeniziation from the character
@@ -192,15 +184,13 @@ public:
 class String : public State {
 public:
     String() {}
-    virtual const State* handle(MasterLexer& /*lexer*/,
-                                MasterLexer::Options /*options*/) const
+    virtual const State* handle(MasterLexer& /*lexer*/) const
     {
         return (NULL);
     }
 };
 
 namespace {
-const Start START_STATE;
 const CRLF CRLF_STATE;
 const String STRING_STATE;
 }
@@ -208,8 +198,6 @@ const String STRING_STATE;
 const State&
 State::getInstance(ID state_id) {
     switch (state_id) {
-    case Start:
-        return (START_STATE);
     case CRLF:
         return (CRLF_STATE);
     case String:
@@ -218,34 +206,35 @@ State::getInstance(ID state_id) {
 }
 
 const State*
-Start::handle(MasterLexer& lexer, MasterLexer::Options options) const {
-    size_t& paren_count = getLexerImpl(lexer)->paren_count_; // shortcut
+State::start(MasterLexer& lexer, MasterLexer::Options options) {
+    // define some shortcuts
+    MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
+    size_t& paren_count = lexerimpl.paren_count_;
 
     while (true) {
-        const int c = getLexerImpl(lexer)->skipComment(
-            getLexerImpl(lexer)->source_->getChar());
+        const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
         if (c == InputSource::END_OF_STREAM) {
-            getLexerImpl(lexer)->last_was_eol_ = false;
+            lexerimpl.last_was_eol_ = false;
             if (paren_count != 0) {
-                getLexerImpl(lexer)->token_ = Token(Token::UNBALANCED_PAREN);
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
                 paren_count = 0; // reset to 0; this helps in lenient mode.
                 return (NULL);
             }
-            getLexerImpl(lexer)->token_ = Token(Token::END_OF_FILE);
+            lexerimpl.token_ = Token(Token::END_OF_FILE);
             return (NULL);
         } else if (c == ' ' || c == '\t') {
             // If requested and we are not in (), recognize the initial space.
-            if (getLexerImpl(lexer)->last_was_eol_ && paren_count == 0 &&
+            if (lexerimpl.last_was_eol_ && paren_count == 0 &&
                 (options & MasterLexer::INITIAL_WS) != 0) {
-                getLexerImpl(lexer)->last_was_eol_ = false;
-                getLexerImpl(lexer)->token_ = Token(Token::INITIAL_WS);
+                lexerimpl.last_was_eol_ = false;
+                lexerimpl.token_ = Token(Token::INITIAL_WS);
                 return (NULL);
             }
             continue;
         } else if (c == '\n') {
-            getLexerImpl(lexer)->last_was_eol_ = true;
+            lexerimpl.last_was_eol_ = true;
             if (paren_count == 0) { // we don't recognize EOL if we are in ()
-                getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+                lexerimpl.token_ = Token(Token::END_OF_LINE);
                 return (NULL);
             }
         } else if (c == '\r') {
@@ -253,20 +242,20 @@ Start::handle(MasterLexer& lexer, MasterLexer::Options options) const {
                 return (&CRLF_STATE);
             }
         } else if (c == '(') {
-            getLexerImpl(lexer)->last_was_eol_ = false;
+            lexerimpl.last_was_eol_ = false;
             ++paren_count;
             continue;
         } else if (c == ')') {
-            getLexerImpl(lexer)->last_was_eol_ = false;
+            lexerimpl.last_was_eol_ = false;
             if (paren_count == 0) {
-                getLexerImpl(lexer)->token_ = Token(Token::UNBALANCED_PAREN);
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
                 return (NULL);
             }
             --paren_count;
             continue;
         } else {
             // Note: in #2373 we should probably ungetChar().
-            getLexerImpl(lexer)->last_was_eol_ = false;
+            lexerimpl.last_was_eol_ = false;
             return (&STRING_STATE);
         }
     }

+ 4 - 4
src/lib/dns/master_lexer_state.h

@@ -25,15 +25,15 @@ class InputSource;
 
 class State {
 public:
-    virtual const State* handle(MasterLexer& lexer,
-                                MasterLexer::Options options) const = 0;
+    static const State* start(MasterLexer& lexer,
+                              MasterLexer::Options options);
+    virtual const State* handle(MasterLexer& lexer) const = 0;
 
     /// Specific states are basically hidden within the implementation,
     /// but we'd like to allow tests to examine them, so we provide
     /// a way to get an instance of a specific state.
     enum ID {
-        Start,                  ///< TBD
-        CRLF,
+        CRLF,                  ///< TBD
         String
     };
     static const State& getInstance(ID state_id);

+ 75 - 78
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -30,7 +30,6 @@ class MasterLexerStateTest : public ::testing::Test {
 protected:
     MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
                              s_null(NULL),
-                             s_start(State::getInstance(State::Start)),
                              s_crlf(State::getInstance(State::CRLF)),
                              s_string(State::getInstance(State::String)),
                              options(MasterLexer::NONE),
@@ -43,9 +42,7 @@ protected:
     const MasterLexer::Options common_options;
     MasterLexer lexer;
     const State* const s_null;
-    const State& s_start;
     const State& s_crlf;
-
     const State& s_string;
     std::stringstream ss;
     MasterLexer::Options options, orig_options;
@@ -64,19 +61,19 @@ eofCheck(const State& state, MasterLexer& lexer) {
 TEST_F(MasterLexerStateTest, startAndEnd) {
     // A simple case: the input is empty, so we begin with start and
     // are immediately done.
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    eofCheck(s_start, lexer);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
 }
 
 TEST_F(MasterLexerStateTest, startToEOL) {
     ss << "\n";
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_TRUE(s_start.wasLastEOL(lexer));
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
 
     // The next lexer session will reach EOF.  Same eof check should pass.
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    eofCheck(s_start, lexer);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
 }
 
 TEST_F(MasterLexerStateTest, space) {
@@ -86,72 +83,72 @@ TEST_F(MasterLexerStateTest, space) {
     // normal space and ignored.
     for (size_t i = 0; i < 2; ++i) {
         ss << " \t\n";
-        EXPECT_EQ(s_null, s_start.handle(lexer, MasterLexer::NONE));
-        EXPECT_TRUE(s_start.wasLastEOL(lexer));
-        EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+        EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
+        EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+        EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
     }
 
     // Now we specify the INITIAL_WS option.  It will be recognized and the
     // corresponding token will be returned.
     ss << " ";
-    EXPECT_EQ(s_null, s_start.handle(lexer, MasterLexer::INITIAL_WS));
-    EXPECT_FALSE(s_start.wasLastEOL(lexer));
-    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
 }
 
 TEST_F(MasterLexerStateTest, parentheses) {
     ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
 
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options)); // handle \n
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
 
     // Now handle '('.  It skips \n and recognize 'a' as string
-    EXPECT_EQ(0, s_start.getParenCount(lexer)); // check pre condition
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options));
-    EXPECT_EQ(1, s_start.getParenCount(lexer)); // check post condition
-    EXPECT_FALSE(s_start.wasLastEOL(lexer));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
 
     // skip 'a' (note: until #2373 it's actually skipped as part of the '('
     // handling)
-    s_string.handle(lexer, common_options);
+    s_string.handle(lexer);
 
     // Then handle ')'.  '\n' before ')' isn't recognized because
     // it's canceled due to the '('.  Likewise, the space after the '\n'
     // shouldn't be recognized but should be just ignored.
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(0, s_start.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
 
     // Now, temporarily disabled options are restored: Both EOL and the
     // initial WS are recognized
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
 }
 
 TEST_F(MasterLexerStateTest, nestedParentheses) {
     // This is an unusual, but allowed (in this implementation) case.
     ss << "(a(b)\n c)\n ";
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options)); // consume '('
-    s_string.handle(lexer, common_options);                      // consume 'a'
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options)); // consume '('
-    s_string.handle(lexer, common_options);                     // consume 'b'
-    EXPECT_EQ(2, s_start.getParenCount(lexer)); // now the count is 2
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                     // consume 'b'
+    EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
 
     // Close the inner most parentheses.  count will be decreased, but option
     // shouldn't be restored yet, so the intermediate EOL or initial WS won't
     // be recognized.
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options)); // consume ')'
-    s_string.handle(lexer, common_options);                      // consume 'c'
-    EXPECT_EQ(1, s_start.getParenCount(lexer));
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
+    s_string.handle(lexer);                      // consume 'c'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
 
     // Close the outermost parentheses.  count will be reset to 0, and original
     // options are restored.
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
 
     // Now, temporarily disabled options are restored: Both EOL and the
     // initial WS are recognized
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
 }
 
 TEST_F(MasterLexerStateTest, unbalancedParentheses) {
@@ -159,27 +156,27 @@ TEST_F(MasterLexerStateTest, unbalancedParentheses) {
     // correctly canceled after detecting the error.
     ss << "\n)";
 
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options)); // consume '\n'
-    EXPECT_TRUE(s_start.wasLastEOL(lexer)); // this \n was remembered
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
 
     // Now checking ')'.  The result should be error, count shouldn't be
     // changed.  "last EOL" should be canceled.
-    EXPECT_EQ(0, s_start.getParenCount(lexer));
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(0, s_start.getParenCount(lexer));
-    ASSERT_EQ(Token::ERROR, s_start.getToken(lexer).getType());
-    EXPECT_EQ(Token::UNBALANCED_PAREN, s_start.getToken(lexer).getErrorCode());
-    EXPECT_FALSE(s_start.wasLastEOL(lexer));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
 
     // Reach EOF with a dangling open parenthesis.
     ss << "(a";
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options)); // consume '('
-    s_string.handle(lexer, common_options);                      // consume 'a'
-    EXPECT_EQ(1, s_start.getParenCount(lexer));
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));    // reach EOF
-    ASSERT_EQ(Token::ERROR, s_start.getToken(lexer).getType());
-    EXPECT_EQ(Token::UNBALANCED_PAREN, s_start.getToken(lexer).getErrorCode());
-    EXPECT_EQ(0, s_start.getParenCount(lexer)); // should be reset to 0
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));    // reach EOF
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
 }
 
 TEST_F(MasterLexerStateTest, startToComment) {
@@ -187,13 +184,13 @@ TEST_F(MasterLexerStateTest, startToComment) {
     // the rest of the line, and recognize the new line.  Note that the
     // second ';' is simply ignored.
     ss << "  ;a;\n";
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
 
     // Likewise, but the comment ends with EOF.
     ss << ";a;";
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(Token::END_OF_FILE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
 }
 
 TEST_F(MasterLexerStateTest, commentAfterParen) {
@@ -202,42 +199,42 @@ TEST_F(MasterLexerStateTest, commentAfterParen) {
     // check it explicitly.
     ss << "( ;this is a comment\na)\n";
     // consume '(', skip comments, consume 'a', then consume ')'
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options));
-    s_string.handle(lexer, common_options);
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    s_string.handle(lexer);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
 }
 
 TEST_F(MasterLexerStateTest, crlf) {
     // A sequence of \r, \n is recognized as a single 'end-of-line'
     ss << "\r\n";
-    EXPECT_EQ(&s_crlf, s_start.handle(lexer, common_options)); // recognize '\r'
-    EXPECT_EQ(s_null, s_crlf.handle(lexer, common_options));   // recognize '\n'
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
-    EXPECT_TRUE(s_start.wasLastEOL(lexer));
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
 
     // Single '\r' (not followed by \n) is recognized as a single 'end-of-line'
     ss << "\r ";                // then there will be "initial WS"
-    EXPECT_EQ(&s_crlf, s_start.handle(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // see ' ', "unget" it
-    EXPECT_EQ(s_null, s_crlf.handle(lexer, common_options));
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options)); // recognize ' '
-    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
 
     ss << "\r;comment\na";
-    EXPECT_EQ(&s_crlf, s_start.handle(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // skip comments, recognize '\n'
-    EXPECT_EQ(s_null, s_crlf.handle(lexer, common_options));
-    EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
-    EXPECT_EQ(&s_string, s_start.handle(lexer, common_options));
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
 
     // \r then EOF
     ss << "\r";
-    EXPECT_EQ(&s_crlf, s_start.handle(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     // see EOF, then "unget" it
-    EXPECT_EQ(s_null, s_crlf.handle(lexer, common_options));
-    EXPECT_EQ(s_null, s_start.handle(lexer, common_options));  // recognize EOF
-    EXPECT_EQ(Token::END_OF_FILE, s_start.getToken(lexer).getType());
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));  // recognize EOF
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
 }
 
 }