Browse Source

[2372] handled spaces.

handle() signature was extended so it can refer to and update lexer
options.
JINMEI Tatuya 12 years ago
parent
commit
d6ed107c38

+ 35 - 16
src/lib/dns/master_lexer.cc

@@ -142,33 +142,25 @@ State::getToken(MasterLexer& lexer) const {
 class Start : public State {
 public:
     Start() {}
-    virtual const State* handle(MasterLexer& lexer) const {
-        const int c = getLexerImpl(lexer)->source_->getChar();
-        if (c < 0) {
-            // TODO: handle unbalance cases
-            getLexerImpl(lexer)->last_was_eol_ = false;
-            getLexerImpl(lexer)->token_ = Token(Token::END_OF_FILE);
-            return (NULL);
-        } else if (c == '\n') {
-            getLexerImpl(lexer)->last_was_eol_ = true;
-            getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
-            return (NULL);
-        }
-        return (&State::getInstance(State::CRLF)); // placeholder
-    }
+    virtual const State* handle(MasterLexer& lexer,
+                                MasterLexer::Options& options,
+                                MasterLexer::Options orig_options) const;
 };
 
 class CRLF : public State {
 public:
     CRLF() {}
-    virtual const State* handle(MasterLexer& /*lexer*/) const {
+    virtual const State* handle(MasterLexer& /*lexer*/,
+                                MasterLexer::Options& /*options*/,
+                                MasterLexer::Options /*orig_options*/) const
+    {
         return (NULL);
     }
 };
 
 namespace {
 const Start START_STATE;
-const CRLF CRLF_STARTE;
+const CRLF CRLF_STATE;
 }
 
 const State&
@@ -176,6 +168,33 @@ State::getInstance(ID /*state_id*/) {
     return (START_STATE);
 }
 
+const State*
+Start::handle(MasterLexer& lexer, MasterLexer::Options& options,
+              MasterLexer::Options /*orig_options*/) const
+{
+    while (true) {
+        const int c = getLexerImpl(lexer)->source_->getChar();
+        if (c < 0) {
+            // TODO: handle unbalance cases
+            getLexerImpl(lexer)->last_was_eol_ = false;
+            getLexerImpl(lexer)->token_ = Token(Token::END_OF_FILE);
+            return (NULL);
+        } else if (c == ' ' || c == '\t') {
+            if (getLexerImpl(lexer)->last_was_eol_ &&
+                (options & MasterLexer::INITIAL_WS) != 0) {
+                getLexerImpl(lexer)->last_was_eol_ = false;
+                getLexerImpl(lexer)->token_ = Token(Token::INITIAL_WS);
+                return (NULL);
+            }
+            continue;
+        } else if (c == '\n') {
+            getLexerImpl(lexer)->last_was_eol_ = true;
+            getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+            return (NULL);
+        }
+        return (&CRLF_STATE); // placeholder
+    }
+}
 
 } // namespace master_lexer_internal
 

+ 7 - 0
src/lib/dns/master_lexer.h

@@ -50,6 +50,13 @@ class MasterLexer {
 public:
     class Token;       // we define it separately for better readability
 
+    enum Options {
+        NONE = 0, //< No option
+        INITIAL_WS = 1, ///< recognize begin-of-line spaces
+        QSTRING = INITIAL_WS << 1, ///< recognize quoted string
+        NUMBER = QSTRING << 1, ///< recognize numeric text as integer
+    };
+
     /// \brief The constructor.
     ///
     /// \throw std::bad_alloc Internal resource allocation fails (rare case).

+ 4 - 1
src/lib/dns/master_lexer_state.h

@@ -30,7 +30,10 @@ public:
         CRLF,
         EatLine
     };
-    virtual const State* handle(MasterLexer& lexer) const = 0;
+    virtual const State* handle(MasterLexer& lexer,
+                                MasterLexer::Options& options,
+                                MasterLexer::Options orig_options =
+                                MasterLexer::NONE) const = 0;
 
     static const State& getInstance(ID state_id);
 

+ 30 - 4
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -29,7 +29,8 @@ typedef MasterLexer::Token Token; // shortcut
 class MasterLexerStateTest : public ::testing::Test {
 protected:
     MasterLexerStateTest() : s_start(State::getInstance(State::Start)),
-                             s_crlf(State::getInstance(State::CRLF))
+                             s_crlf(State::getInstance(State::CRLF)),
+                             options(MasterLexer::NONE)
     {
         lexer.open(ss);
     }
@@ -37,6 +38,7 @@ protected:
     const State& s_crlf;
     MasterLexer lexer;
     std::stringstream ss;
+    MasterLexer::Options options;
 };
 
 // Common check for the end-of-file condition.
@@ -52,21 +54,45 @@ eofCheck(const State& state, MasterLexer& lexer) {
 TEST_F(MasterLexerStateTest, startAndEnd) {
     // A simple case: the input is empty, so we begin with start and
     // are immediately done.
-    const State* s_next = s_start.handle(lexer);
+    const State* s_next = s_start.handle(lexer, options);
     EXPECT_EQ(static_cast<const State*>(NULL), s_next);
     eofCheck(s_start, lexer);
 }
 
 TEST_F(MasterLexerStateTest, startToEOL) {
     ss << "\n";
-    const State* s_next = s_start.handle(lexer);
+    const State* s_next = s_start.handle(lexer, options);
     EXPECT_EQ(static_cast<const State*>(NULL), s_next);
     EXPECT_TRUE(s_start.wasLastEOL(lexer));
     EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
 
     // The next lexer session will reach EOF.  Same eof check should pass.
-    s_start.handle(lexer);
+    s_start.handle(lexer, options);
     eofCheck(s_start, lexer);
 }
 
+TEST_F(MasterLexerStateTest, space) {
+    // by default space characters and tabs will be ignored.  We check this
+    // twice; at the second iteration, it's a white space at the beginning
+    // of line, but since we don't specify INITIAL_WS option, it's treated as
+    // normal space and ignored.
+    const State* s_next;
+    for (size_t i = 0; i < 2; ++i) {
+        ss << " \t\n";
+        s_next = s_start.handle(lexer, options);
+        EXPECT_EQ(static_cast<const State*>(NULL), s_next);
+        EXPECT_TRUE(s_start.wasLastEOL(lexer));
+        EXPECT_EQ(Token::END_OF_LINE, s_start.getToken(lexer).getType());
+    }
+
+    // Now we specify the INITIAL_WS option.  It will be recognized and the
+    // corresponding token will be returned.
+    ss << " ";
+    options = MasterLexer::INITIAL_WS;
+    s_next = s_start.handle(lexer, options);
+    EXPECT_EQ(static_cast<const State*>(NULL), s_next);
+    EXPECT_FALSE(s_start.wasLastEOL(lexer));
+    EXPECT_EQ(Token::INITIAL_WS, s_start.getToken(lexer).getType());
+}
+
 }