Browse Source

[2374] Add Number lexer state

Jelte Jansen 12 years ago
parent
commit
70a7eb4ae1

+ 51 - 0
src/lib/dns/master_lexer.cc

@@ -236,6 +236,13 @@ public:
     virtual const State* handle(MasterLexer& lexer) const;
 };
 
+class Number : public State {
+public:
+    Number() {}
+    virtual ~Number() {}
+    virtual const State* handle(MasterLexer& lexer) const;
+};
+
 // We use a common instance of a each state in a singleton-like way to save
 // construction overhead.  They are not singletons in its strict sense as
 // we don't prohibit direct construction of these objects.  But that doesn't
@@ -244,6 +251,7 @@ public:
 const CRLF CRLF_STATE;
 const String STRING_STATE;
 const QString QSTRING_STATE;
+const Number NUMBER_STATE;
 }
 
 const State&
@@ -255,6 +263,8 @@ State::getInstance(ID state_id) {
         return (STRING_STATE);
     case QString:
         return (QSTRING_STATE);
+    case Number:
+        return (NUMBER_STATE);
     }
 
     // This is a bug of the caller, and this method is only expected to be
@@ -315,6 +325,11 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
                 return (NULL);
             }
             --paren_count;
+        } else if (isdigit(c)) {
+            lexerimpl.last_was_eol_ = false;
+            // this character will be handled in the number state
+            lexerimpl.source_->ungetChar();
+            return (&NUMBER_STATE);
         } else {
             // this character will be handled in the string state
             lexerimpl.source_->ungetChar();
@@ -379,6 +394,42 @@ QString::handle(MasterLexer& lexer) const {
     }
 }
 
+const State*
+Number::handle(MasterLexer& lexer) const {
+    MasterLexer::Token& token = getLexerImpl(lexer)->token_;
+    // Do we want to support octal and/or hex here?
+    const unsigned int base = 10;
+
+    // It may yet turn out to be a string, so we first
+    // collect all the data
+    bool digits_only = true;
+    std::vector<char>& data = getLexerImpl(lexer)->data_;
+    data.clear();
+    bool escaped = false;
+
+    while (true) {
+        const int c = getLexerImpl(lexer)->source_->getChar();
+        if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
+            getLexerImpl(lexer)->source_->ungetChar();
+            if (digits_only) {
+                // Close the string for strtoul
+                data.push_back('\0');
+                token = MasterLexer::Token(strtoul(&data.at(0),
+                                                   NULL, base));
+            } else {
+                token = MasterLexer::Token(&data.at(0),
+                                           data.size());
+            }
+            return (NULL);
+        }
+        if (!isdigit(c)) {
+            digits_only = false;
+        }
+        escaped = (c == '\\' && !escaped);
+        data.push_back(c);
+    }
+}
+
 } // namespace master_lexer_internal
 
 } // end of namespace dns

+ 2 - 1
src/lib/dns/master_lexer_state.h

@@ -99,7 +99,8 @@ public:
     enum ID {
         CRLF,                  ///< Just seen a carriage-return character
         String,                ///< Handling a string token
-        QString                ///< Handling a quoted string token
+        QString,               ///< Handling a quoted string token
+        Number                 ///< Handling a number
     };
 
     /// \brief Returns a \c State instance of the given state.

+ 63 - 0
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -33,6 +33,7 @@ protected:
                              s_crlf(State::getInstance(State::CRLF)),
                              s_string(State::getInstance(State::String)),
                              s_qstring(State::getInstance(State::QString)),
+                             s_number(State::getInstance(State::Number)),
                              options(MasterLexer::NONE),
                              orig_options(options)
     {}
@@ -44,6 +45,7 @@ protected:
     const State& s_crlf;
     const State& s_string;
     const State& s_qstring;
+    const State& s_number;
     std::stringstream ss;
     MasterLexer::Options options, orig_options;
 };
@@ -450,4 +452,65 @@ TEST_F(MasterLexerStateTest, brokenQuotedString) {
     EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
 }
 
+TEST_F(MasterLexerStateTest, number) {
+    ss << "0 ";
+    ss << "1 ";
+    ss << "12345 ";
+    ss << "4294967295 "; // 2^32-1
+    ss << "4294967296 "; // 2^32 (this overflows to 0, we
+                         // can consider failing on it, but
+                         // this is what bind9 does as well)
+    ss << "4294967297 "; // 2^32+1 (this overflows to 1, see
+                         // above)
+    ss << "1000000000000000000 "; // overflows to 2808348672
+    ss << "005 ";        // Leading zeroes are ignored
+    ss << "-1 ";         // Negative numbers are interpreted
+                         // as strings (unsigned integers only)
+    ss << "123abc456";   // 'Numbers' containing non-digits should
+                         // be interpreted as strings
+
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(0, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(1, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(12345, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(4294967295, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(0, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(1, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(2808348672, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_number.handle(lexer));
+    EXPECT_EQ(5, s_number.getToken(lexer).getNumber());
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer));
+    stringTokenCheck("-1", s_string.getToken(lexer), false);
+
+    // Starts out as a number, but ends up being a string
+    EXPECT_EQ(&s_number, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer));
+    stringTokenCheck("123abc456", s_string.getToken(lexer), false);
+}
+
 }