Browse Source

[2373] supported quoted string state

JINMEI Tatuya 12 years ago
parent
commit
31b507f443

+ 49 - 4
src/lib/dns/master_lexer.cc

@@ -193,7 +193,6 @@ public:
     }
 };
 
-// Currently this is provided mostly as a place holder
 class String : public State {
 public:
     String() {}
@@ -201,6 +200,13 @@ public:
     virtual const State* handle(MasterLexer& lexer) const;
 };
 
+class QString : public State {
+public:
+    QString() {}
+    virtual ~QString() {}      // see the base class for the destructor
+    virtual const State* handle(MasterLexer& lexer) const;
+};
+
 // We use a common instance of a each state in a singleton-like way to save
 // construction overhead.  They are not singletons in its strict sense as
 // we don't prohibit direct construction of these objects.  But that doesn't
@@ -208,6 +214,7 @@ public:
 // this file.
 const CRLF CRLF_STATE;
 const String STRING_STATE;
+const QString QSTRING_STATE;
 }
 
 const State&
@@ -217,6 +224,8 @@ State::getInstance(ID state_id) {
         return (CRLF_STATE);
     case String:
         return (STRING_STATE);
+    case QString:
+        return (QSTRING_STATE);
     }
 
     // This is a bug of the caller, and this method is only expected to be
@@ -261,6 +270,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
             if (paren_count == 0) { // check if we are in () (see above)
                 return (&CRLF_STATE);
             }
+        } else if (c == '"' && (options & MasterLexer::QSTRING) != 0) {
+            lexerimpl.last_was_eol_ = false;
+            return (&QSTRING_STATE);
         } else if (c == '(') {
             lexerimpl.last_was_eol_ = false;
             ++paren_count;
@@ -284,7 +296,6 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
 const State*
 String::handle(MasterLexer& lexer) const {
     std::vector<char>& data = getLexerImpl(lexer)->data_;
-    MasterLexer::Token& token = getLexerImpl(lexer)->token_;
     data.clear();
 
     bool escaped = false;
@@ -298,14 +309,48 @@ String::handle(MasterLexer& lexer) const {
             (!escaped &&
              (c == ' ' || c == '\t' || c == '(' || c == ')'))) {
             getLexerImpl(lexer)->source_->ungetChar();
-            token = MasterLexer::Token(&data.at(0), data.size());
+            getLexerImpl(lexer)->token_ =
+                MasterLexer::Token(&data.at(0), data.size());
             return (NULL);
         }
-        escaped = (!escaped && (c == '\\'));
+        escaped = (c == '\\' && !escaped);
         data.push_back(c);
     }
 }
 
+const State*
+QString::handle(MasterLexer& lexer) const {
+    MasterLexer::Token& token = getLexerImpl(lexer)->token_;
+    std::vector<char>& data = getLexerImpl(lexer)->data_;
+    data.clear();
+
+    bool escaped = false;
+    while (true) {
+        const int c = getLexerImpl(lexer)->source_->getChar();
+        if (c == InputSource::END_OF_STREAM) {
+            token = Token(Token::UNEXPECTED_END);
+            return (NULL);
+        } else if (c == '"') {
+            if (escaped) {
+                // found escaped '"'. overwrite the preceding backslash.
+                assert(!data.empty());
+                escaped = false;
+                data.back() = '"';
+            } else {
+                token = MasterLexer::Token(&data.at(0), data.size(), true);
+                return (NULL);
+            }
+        } else if (c == '\n' && !escaped) {
+            getLexerImpl(lexer)->source_->ungetChar();
+            token = Token(Token::UNBALANCED_QUOTES);
+            return (NULL);
+        } else {
+            escaped = (c == '\\' && !escaped);
+            data.push_back(c);
+        }
+    }
+}
+
 } // namespace master_lexer_internal
 
 } // end of namespace dns

+ 2 - 1
src/lib/dns/master_lexer_state.h

@@ -98,7 +98,8 @@ public:
     /// a way to get an instance of a specific state.
     enum ID {
         CRLF,                  ///< Just seen a carriage-return character
-        String                 ///< Handling a string token
+        String,                ///< Handling a string token
+        QString                ///< Handling a quoted string token
     };
 
     /// \brief Returns a \c State instance of the given state.

+ 85 - 2
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -32,6 +32,7 @@ protected:
                              s_null(NULL),
                              s_crlf(State::getInstance(State::CRLF)),
                              s_string(State::getInstance(State::String)),
+                             s_qstring(State::getInstance(State::QString)),
                              options(MasterLexer::NONE),
                              orig_options(options)
     {}
@@ -42,6 +43,7 @@ protected:
     const State* const s_null;
     const State& s_crlf;
     const State& s_string;
+    const State& s_qstring;
     std::stringstream ss;
     MasterLexer::Options options, orig_options;
 };
@@ -254,9 +256,10 @@ TEST_F(MasterLexerStateTest, crlf) {
 }
 
 void
-stringTokenCheck(const std::string& expected, const MasterLexer::Token& token)
+stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
+                 bool quoted = false)
 {
-    EXPECT_EQ(Token::STRING, token.getType());
+    EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());
     EXPECT_EQ(expected, token.getString());
     const std::string actual(token.getStringRegion().beg,
                              token.getStringRegion().beg +
@@ -350,4 +353,84 @@ TEST_F(MasterLexerStateTest, stringEscape) {
     stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
 }
 
+TEST_F(MasterLexerStateTest, quotedString) {
+    ss << "\"ignore-quotes\"\n";
+    ss << "\"quoted string\" ";
+    ss << "\"escape\\ in quote\" ";
+    ss << "\"escaped\\\"\" ";
+    ss << "\"escaped backslash\\\\\" ";
+    ss << "\"no;comment\"";
+    lexer.pushSource(ss);
+
+    // by default, '"' doesn't have any special meaning and part of string
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+    stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
+    EXPECT_TRUE(s_string.wasLastEOL(lexer));
+
+    // If QSTRING is specified in option, '"' is regarded as a beginning of
+    // a quoted string.
+    const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("quoted string", s_string.getToken(lexer), true);
+
+    // escape character mostly doesn't have any effect in the qstring
+    // processing
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
+
+    // The only exception is the quotation mark itself.  Note that the escape
+    // only works on the quotation mark immediately after it.
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
+
+    // quoted '\' then '"'.  Unlike the previous case '"' shouldn't be
+    // escaped.
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
+
+    // ';' has no meaning in a quoted string (not indicating a comment)
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("no;comment", s_string.getToken(lexer), true);
+}
+
+TEST_F(MasterLexerStateTest, brokenQuotedString) {
+    ss << "\"unbalanced-quote\n";
+    ss << "\"quoted\\\n\" ";
+    ss << "\"unclosed quote and EOF";
+    lexer.pushSource(ss);
+
+    // EOL is encountered without closing the quote
+    const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_QUOTES,
+              s_qstring.getToken(lexer).getErrorCode());
+    // We can resume after the error from the '\n'
+    EXPECT_EQ(s_null, State::start(lexer, options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // \n is okay in a quoted string if escaped
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
+
+    // EOF is encountered without closing the quote
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
+    // If we continue we'll simply see the EOF
+    EXPECT_EQ(s_null, State::start(lexer, options));
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
 }