Browse Source

[2382] unrelated fix to lexer: support empty qstring and nul termination.

an empty qstring previously caused an exception, which is a clear bug
and should be fixed.  nul-terminating string regions is an extension,
but I found it useful when implementing RDATA parsers.
JINMEI Tatuya 12 years ago
parent
commit
f73f27474f

+ 10 - 3
src/lib/dns/master_lexer.cc

@@ -458,8 +458,11 @@ String::handle(MasterLexer& lexer) const {
 
         if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
             getLexerImpl(lexer)->source_->ungetChar();
+            // make sure it nul-terminated as a c-str (excluded from token
+            // data).
+            data.push_back('\0');
             getLexerImpl(lexer)->token_ =
-                MasterToken(&data.at(0), data.size());
+                MasterToken(&data.at(0), data.size() - 1);
             return;
         }
         escaped = (c == '\\' && !escaped);
@@ -486,7 +489,10 @@ QString::handle(MasterLexer& lexer) const {
                 escaped = false;
                 data.back() = '"';
             } else {
-                token = MasterToken(&data.at(0), data.size(), true);
+                // make sure it nul-terminated as a c-str (excluded from token
+                // data).  This also simplifies the case of an empty string.
+                data.push_back('\0');
+                token = MasterToken(&data.at(0), data.size() - 1, true);
                 return;
             }
         } else if (c == '\n' && !escaped) {
@@ -529,7 +535,8 @@ Number::handle(MasterLexer& lexer) const {
                     token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
                 }
             } else {
-                token = MasterToken(&data.at(0), data.size());
+                data.push_back('\0'); // see String::handle()
+                token = MasterToken(&data.at(0), data.size() - 1);
             }
             return;
         }

+ 7 - 0
src/lib/dns/master_lexer.h

@@ -90,6 +90,13 @@ public:
     /// the region.  On the other hand, it is not ensured that the string
     /// is nul-terminated.  So the usual string manipulation API may not work
     /// as expected.
+    ///
+    /// The `MasterLexer` implementation ensures that there are at least
+    /// len + 1 bytes of valid memory region starting from beg, and that
+    /// beg[len] is \0.  This means the application can use the bytes as a
+    /// validly nul-terminated C string if there is no intermediate nul
+    /// character.  Note also that due to this property beg is always non
+    /// NULL; for an empty string len will be set to 0 and beg[0] is \0.
     struct StringRegion {
         const char* beg;        ///< The start address of the string
         size_t len;             ///< The length of the string in bytes

+ 10 - 0
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -269,6 +269,10 @@ stringTokenCheck(const std::string& expected, const MasterToken& token,
                              token.getStringRegion().beg +
                              token.getStringRegion().len);
     EXPECT_EQ(expected, actual);
+
+    // There should be "hidden" nul-terminator after the string data.
+    ASSERT_NE(static_cast<const char*>(NULL), token.getStringRegion().beg);
+    EXPECT_EQ(0, *(token.getStringRegion().beg + token.getStringRegion().len));
 }
 
 TEST_F(MasterLexerStateTest, string) {
@@ -365,6 +369,7 @@ TEST_F(MasterLexerStateTest, stringEscape) {
 TEST_F(MasterLexerStateTest, quotedString) {
     ss << "\"ignore-quotes\"\n";
     ss << "\"quoted string\" "; // space is part of the qstring
+    ss << "\"\" "; // empty quoted string
     // also check other separator characters. note that \r doesn't cause
     // UNBALANCED_QUOTES.  Not sure if it's intentional, but that's how the
     // BIND 9 version works, so we follow it (it should be too minor to matter
@@ -391,6 +396,11 @@ TEST_F(MasterLexerStateTest, quotedString) {
     s_qstring.handle(lexer);
     stringTokenCheck("quoted string", s_string.getToken(lexer), true);
 
+    // Empty string is okay as qstring
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    s_qstring.handle(lexer);
+    stringTokenCheck("", s_string.getToken(lexer), true);
+
     // Also checks other separator characters within a qstring
     EXPECT_EQ(&s_qstring, State::start(lexer, options));
     s_qstring.handle(lexer);