Parcourir la source

Merge branch 'master' into trac2451

Mukund Sivaraman il y a 12 ans
Parent
commit
ac0d3badda

+ 106 - 8
src/lib/dns/master_lexer.cc

@@ -20,6 +20,7 @@
 
 
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_ptr.hpp>
 
 
+#include <bitset>
 #include <cassert>
 #include <cassert>
 #include <string>
 #include <string>
 #include <vector>
 #include <vector>
@@ -35,14 +36,23 @@ using namespace master_lexer_internal;
 struct MasterLexer::MasterLexerImpl {
 struct MasterLexer::MasterLexerImpl {
     MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
     MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
                         paren_count_(0), last_was_eol_(false)
                         paren_count_(0), last_was_eol_(false)
-    {}
+    {
+        separators_.set('\r');
+        separators_.set('\n');
+        separators_.set(' ');
+        separators_.set('\t');
+        separators_.set('(');
+        separators_.set(')');
+        esc_separators_.set('\r');
+        esc_separators_.set('\n');
+    }
 
 
     // A helper method to skip possible comments toward the end of EOL or EOF.
     // A helper method to skip possible comments toward the end of EOL or EOF.
     // commonly used by state classes.  It returns the corresponding "end-of"
     // commonly used by state classes.  It returns the corresponding "end-of"
     // character in case it's a comment; otherwise it simply returns the
     // character in case it's a comment; otherwise it simply returns the
     // current character.
     // current character.
-    int skipComment(int c) {
-        if (c == ';') {
+    int skipComment(int c, bool escaped = false) {
+        if (c == ';' && !escaped) {
             while (true) {
             while (true) {
                 c = source_->getChar();
                 c = source_->getChar();
                 if (c == '\n' || c == InputSource::END_OF_STREAM) {
                 if (c == '\n' || c == InputSource::END_OF_STREAM) {
@@ -53,14 +63,34 @@ struct MasterLexer::MasterLexerImpl {
         return (c);
         return (c);
     }
     }
 
 
+    bool isTokenEnd(int c, bool escaped) {
+        // Special case of EOF (end of stream); this is not in the bitmaps
+        if (c == InputSource::END_OF_STREAM) {
+            return (true);
+        }
+        // In this implementation we only ensure the behavior for unsigned
+        // range of characters, so we restrict the range of the values up to
+        // 0x7f = 127
+        return (escaped ? esc_separators_.test(c & 0x7f) :
+                separators_.test(c & 0x7f));
+    }
+
     std::vector<InputSourcePtr> sources_;
     std::vector<InputSourcePtr> sources_;
     InputSource* source_;       // current source (NULL if sources_ is empty)
     InputSource* source_;       // current source (NULL if sources_ is empty)
     Token token_;               // currently recognized token (set by a state)
     Token token_;               // currently recognized token (set by a state)
+    std::vector<char> data_;    // placeholder for string data
 
 
     // These are used in states, and defined here only as a placeholder.
     // These are used in states, and defined here only as a placeholder.
     // The main lexer class does not need these members.
     // The main lexer class does not need these members.
     size_t paren_count_;        // nest count of the parentheses
     size_t paren_count_;        // nest count of the parentheses
     bool last_was_eol_; // whether the lexer just passed an end-of-line
     bool last_was_eol_; // whether the lexer just passed an end-of-line
+
+    // Bitmaps that gives whether a given (positive) character should be
+    // considered a separator of a string/number token.  The esc_ version
+    // is a subset of the other, excluding characters that can be ignored
+    // if escaped by a backslash.  See isTokenEnd() for the bitmap size.
+    std::bitset<128> separators_;
+    std::bitset<128> esc_separators_;
 };
 };
 
 
 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
@@ -192,14 +222,18 @@ public:
     }
     }
 };
 };
 
 
-// Currently this is provided mostly as a place holder
 class String : public State {
 class String : public State {
 public:
 public:
     String() {}
     String() {}
     virtual ~String() {}      // see the base class for the destructor
     virtual ~String() {}      // see the base class for the destructor
-    virtual const State* handle(MasterLexer& /*lexer*/) const {
-        return (NULL);
-    }
+    virtual const State* handle(MasterLexer& lexer) const;
+};
+
+class QString : public State {
+public:
+    QString() {}
+    virtual ~QString() {}      // see the base class for the destructor
+    virtual const State* handle(MasterLexer& lexer) const;
 };
 };
 
 
 // We use a common instance of a each state in a singleton-like way to save
 // We use a common instance of a each state in a singleton-like way to save
@@ -209,6 +243,7 @@ public:
 // this file.
 // this file.
 const CRLF CRLF_STATE;
 const CRLF CRLF_STATE;
 const String STRING_STATE;
 const String STRING_STATE;
+const QString QSTRING_STATE;
 }
 }
 
 
 const State&
 const State&
@@ -218,6 +253,8 @@ State::getInstance(ID state_id) {
         return (CRLF_STATE);
         return (CRLF_STATE);
     case String:
     case String:
         return (STRING_STATE);
         return (STRING_STATE);
+    case QString:
+        return (QSTRING_STATE);
     }
     }
 
 
     // This is a bug of the caller, and this method is only expected to be
     // This is a bug of the caller, and this method is only expected to be
@@ -233,6 +270,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
     MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
     MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
     size_t& paren_count = lexerimpl.paren_count_;
     size_t& paren_count = lexerimpl.paren_count_;
 
 
+    // Note: the if-else in the loop is getting complicated.  When we complete
+    // #2374, revisit the organization to see if we need a fundamental
+    // refactoring.
     while (true) {
     while (true) {
         const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
         const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
         if (c == InputSource::END_OF_STREAM) {
         if (c == InputSource::END_OF_STREAM) {
@@ -262,6 +302,9 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
             if (paren_count == 0) { // check if we are in () (see above)
             if (paren_count == 0) { // check if we are in () (see above)
                 return (&CRLF_STATE);
                 return (&CRLF_STATE);
             }
             }
+        } else if (c == '"' && (options & MasterLexer::QSTRING) != 0) {
+            lexerimpl.last_was_eol_ = false;
+            return (&QSTRING_STATE);
         } else if (c == '(') {
         } else if (c == '(') {
             lexerimpl.last_was_eol_ = false;
             lexerimpl.last_was_eol_ = false;
             ++paren_count;
             ++paren_count;
@@ -273,7 +316,8 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
             }
             }
             --paren_count;
             --paren_count;
         } else {
         } else {
-            // Note: in #2373 we should probably ungetChar().
+            // this character will be handled in the string state
+            lexerimpl.source_->ungetChar();
             lexerimpl.last_was_eol_ = false;
             lexerimpl.last_was_eol_ = false;
             return (&STRING_STATE);
             return (&STRING_STATE);
         }
         }
@@ -281,6 +325,60 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
     }
     }
 }
 }
 
 
+const State*
+String::handle(MasterLexer& lexer) const {
+    std::vector<char>& data = getLexerImpl(lexer)->data_;
+    data.clear();
+
+    bool escaped = false;
+    while (true) {
+        const int c = getLexerImpl(lexer)->skipComment(
+            getLexerImpl(lexer)->source_->getChar(), escaped);
+
+        if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
+            getLexerImpl(lexer)->source_->ungetChar();
+            getLexerImpl(lexer)->token_ =
+                MasterLexer::Token(&data.at(0), data.size());
+            return (NULL);
+        }
+        escaped = (c == '\\' && !escaped);
+        data.push_back(c);
+    }
+}
+
+const State*
+QString::handle(MasterLexer& lexer) const {
+    MasterLexer::Token& token = getLexerImpl(lexer)->token_;
+    std::vector<char>& data = getLexerImpl(lexer)->data_;
+    data.clear();
+
+    bool escaped = false;
+    while (true) {
+        const int c = getLexerImpl(lexer)->source_->getChar();
+        if (c == InputSource::END_OF_STREAM) {
+            token = Token(Token::UNEXPECTED_END);
+            return (NULL);
+        } else if (c == '"') {
+            if (escaped) {
+                // found escaped '"'. overwrite the preceding backslash.
+                assert(!data.empty());
+                escaped = false;
+                data.back() = '"';
+            } else {
+                token = MasterLexer::Token(&data.at(0), data.size(), true);
+                return (NULL);
+            }
+        } else if (c == '\n' && !escaped) {
+            getLexerImpl(lexer)->source_->ungetChar();
+            token = Token(Token::UNBALANCED_QUOTES);
+            return (NULL);
+        } else {
+            escaped = (c == '\\' && !escaped);
+            data.push_back(c);
+        }
+    }
+}
+
 } // namespace master_lexer_internal
 } // namespace master_lexer_internal
 
 
 } // end of namespace dns
 } // end of namespace dns

+ 27 - 5
src/lib/dns/master_lexer.h

@@ -216,10 +216,10 @@ public:
     /// as an unsigned 32-bit integer.  If we see the need for larger integers
     /// as an unsigned 32-bit integer.  If we see the need for larger integers
     /// or negative numbers, we can then extend the token types.
     /// or negative numbers, we can then extend the token types.
     enum Type {
     enum Type {
-        END_OF_LINE, ///< End of line detected (if asked for detecting it)
-        END_OF_FILE, ///< End of file detected (if asked for detecting it)
+        END_OF_LINE, ///< End of line detected
+        END_OF_FILE, ///< End of file detected
         INITIAL_WS,  ///< White spaces at the beginning of a line after an
         INITIAL_WS,  ///< White spaces at the beginning of a line after an
-                     ///< end of line
+                     ///< end of line (if asked for detecting it)
         NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
         NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
                                        /// no-value (type only) types.
                                        /// no-value (type only) types.
                                        /// Mainly for internal use.
                                        /// Mainly for internal use.
@@ -340,12 +340,34 @@ public:
     ///                       string object.
     ///                       string object.
     /// \return A std::string object corresponding to the string token value.
     /// \return A std::string object corresponding to the string token value.
     std::string getString() const {
     std::string getString() const {
+        std::string ret;
+        getString(ret);
+        return (ret);
+    }
+
+    /// \brief Fill in a string with the value of a string-variant token.
+    ///
+    /// This is similar to the other version of \c getString(), but
+    /// the caller is supposed to pass a placeholder string object.
+    /// This will be more efficient if the caller uses the same
+    /// \c MasterLexer repeatedly and needs to get string token in the
+    /// form of a string object many times as this version could reuse
+    /// the existing internal storage of the passed string.
+    ///
+    /// Any existing content of the passed string will be removed.
+    ///
+    /// \throw InvalidOperation Called on a non string-variant types of token.
+    /// \throw std::bad_alloc Resource allocation failure in constructing the
+    ///                       string object.
+    ///
+    /// \param ret A string object to be filled with the token string.
+    void getString(std::string& ret) const {
         if (type_ != STRING && type_ != QSTRING) {
         if (type_ != STRING && type_ != QSTRING) {
             isc_throw(InvalidOperation,
             isc_throw(InvalidOperation,
                       "Token::getString() for non string-variant type");
                       "Token::getString() for non string-variant type");
         }
         }
-        return (std::string(val_.str_region_.beg,
-                            val_.str_region_.beg + val_.str_region_.len));
+        ret.assign(val_.str_region_.beg,
+                   val_.str_region_.beg + val_.str_region_.len);
     }
     }
 
 
     /// \brief Return the value of a string-variant token as a string object.
     /// \brief Return the value of a string-variant token as a string object.

+ 2 - 1
src/lib/dns/master_lexer_state.h

@@ -98,7 +98,8 @@ public:
     /// a way to get an instance of a specific state.
     /// a way to get an instance of a specific state.
     enum ID {
     enum ID {
         CRLF,                  ///< Just seen a carriage-return character
         CRLF,                  ///< Just seen a carriage-return character
-        String                 ///< Handling a string token
+        String,                ///< Handling a string token
+        QString                ///< Handling a quoted string token
     };
     };
 
 
     /// \brief Returns a \c State instance of the given state.
     /// \brief Returns a \c State instance of the given state.

+ 199 - 2
src/lib/dns/tests/master_lexer_state_unittest.cc

@@ -32,6 +32,7 @@ protected:
                              s_null(NULL),
                              s_null(NULL),
                              s_crlf(State::getInstance(State::CRLF)),
                              s_crlf(State::getInstance(State::CRLF)),
                              s_string(State::getInstance(State::String)),
                              s_string(State::getInstance(State::String)),
+                             s_qstring(State::getInstance(State::QString)),
                              options(MasterLexer::NONE),
                              options(MasterLexer::NONE),
                              orig_options(options)
                              orig_options(options)
     {}
     {}
@@ -42,6 +43,7 @@ protected:
     const State* const s_null;
     const State* const s_null;
     const State& s_crlf;
     const State& s_crlf;
     const State& s_string;
     const State& s_string;
+    const State& s_qstring;
     std::stringstream ss;
     std::stringstream ss;
     MasterLexer::Options options, orig_options;
     MasterLexer::Options options, orig_options;
 };
 };
@@ -111,8 +113,7 @@ TEST_F(MasterLexerStateTest, parentheses) {
     EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
     EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
     EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
     EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
 
 
-    // skip 'a' (note: until #2373 it's actually skipped as part of the '('
-    // handling)
+    // skip 'a'
     s_string.handle(lexer);
     s_string.handle(lexer);
 
 
     // Then handle ')'.  '\n' before ')' isn't recognized because
     // Then handle ')'.  '\n' before ')' isn't recognized because
@@ -244,6 +245,7 @@ TEST_F(MasterLexerStateTest, crlf) {
     EXPECT_EQ(s_null, s_crlf.handle(lexer));
     EXPECT_EQ(s_null, s_crlf.handle(lexer));
     EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
     EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
     EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // skip 'a'
 
 
     // 4. \r then EOF
     // 4. \r then EOF
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
     EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
@@ -253,4 +255,199 @@ TEST_F(MasterLexerStateTest, crlf) {
     EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
     EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
 }
 }
 
 
+// Commonly used check for string related test cases, checking if the given
+// token has expected values.
+void
+stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
+                 bool quoted = false)
+{
+    EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());
+    EXPECT_EQ(expected, token.getString());
+    const std::string actual(token.getStringRegion().beg,
+                             token.getStringRegion().beg +
+                             token.getStringRegion().len);
+    EXPECT_EQ(expected, actual);
+}
+
+TEST_F(MasterLexerStateTest, string) {
+    // Check with simple strings followed by separate characters
+    ss << "followed-by-EOL\n";
+    ss << "followed-by-CR\r";
+    ss << "followed-by-space ";
+    ss << "followed-by-tab\t";
+    ss << "followed-by-comment;this is comment and ignored\n";
+    ss << "followed-by-paren(closing)";
+    ss << "followed-by-EOF";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+    EXPECT_FALSE(s_string.wasLastEOL(lexer));
+    stringTokenCheck("followed-by-EOL", s_string.getToken(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \r
+    stringTokenCheck("followed-by-CR", s_string.getToken(lexer));
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // handle \r...
+    EXPECT_EQ(s_null, s_crlf.handle(lexer)); // ...and skip it
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' '
+    stringTokenCheck("followed-by-space", s_string.getToken(lexer));
+
+    // skip ' ', then recognize the next string
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \t
+    stringTokenCheck("followed-by-tab", s_string.getToken(lexer));
+
+    // skip \t, then recognize the next string
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see comment
+    stringTokenCheck("followed-by-comment", s_string.getToken(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see '('
+    stringTokenCheck("followed-by-paren", s_string.getToken(lexer));
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // str in ()
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize the str, see ')'
+    stringTokenCheck("closing", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see EOF
+    stringTokenCheck("followed-by-EOF", s_string.getToken(lexer));
+}
+
+TEST_F(MasterLexerStateTest, stringEscape) {
+    // some of the separate characters should be considered part of the
+    // string if escaped.
+    ss << "escaped\\ space ";
+    ss << "escaped\\\ttab ";
+    ss << "escaped\\(paren ";
+    ss << "escaped\\)close ";
+    ss << "escaped\\;comment ";
+    ss << "escaped\\\\ backslash "; // second '\' shouldn't escape ' '
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("escaped\\ space", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("escaped\\\ttab", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("escaped\\(paren", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("escaped\\)close", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("escaped\\;comment", s_string.getToken(lexer));
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' in mid
+    stringTokenCheck("escaped\\\\", s_string.getToken(lexer));
+
+    // Confirm the word that follows the escaped '\' is correctly recognized.
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see ' ' at end
+    stringTokenCheck("backslash", s_string.getToken(lexer));
+}
+
+TEST_F(MasterLexerStateTest, quotedString) {
+    ss << "\"ignore-quotes\"\n";
+    ss << "\"quoted string\" "; // space is part of the qstring
+    // also check other separator characters. note that \r doesn't cause
+    // UNBALANCED_QUOTES.  Not sure if it's intentional, but that's how the
+    // BIND 9 version works, so we follow it (it should be too minor to matter
+    // in practice anyway)
+    ss << "\"quoted()\t\rstring\" ";
+    ss << "\"escape\\ in quote\" ";
+    ss << "\"escaped\\\"\" ";
+    ss << "\"escaped backslash\\\\\" ";
+    ss << "\"no;comment\"";
+    lexer.pushSource(ss);
+
+    // by default, '"' doesn't have any special meaning and part of string
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(s_null, s_string.handle(lexer)); // recognize str, see \n
+    stringTokenCheck("\"ignore-quotes\"", s_string.getToken(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // skip \n after it
+    EXPECT_TRUE(s_string.wasLastEOL(lexer));
+
+    // If QSTRING is specified in option, '"' is regarded as a beginning of
+    // a quoted string.
+    const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_FALSE(s_string.wasLastEOL(lexer)); // EOL is canceled due to '"'
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("quoted string", s_string.getToken(lexer), true);
+
+    // Also checks other separator characters within a qstring
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("quoted()\t\rstring", s_string.getToken(lexer), true);
+
+    // escape character mostly doesn't have any effect in the qstring
+    // processing
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escape\\ in quote", s_string.getToken(lexer), true);
+
+    // The only exception is the quotation mark itself.  Note that the escape
+    // only works on the quotation mark immediately after it.
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escaped\"", s_string.getToken(lexer), true);
+
+    // quoted '\' then '"'.  Unlike the previous case '"' shouldn't be
+    // escaped.
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("escaped backslash\\\\", s_string.getToken(lexer), true);
+
+    // ';' has no meaning in a quoted string (not indicating a comment)
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("no;comment", s_string.getToken(lexer), true);
+}
+
+TEST_F(MasterLexerStateTest, brokenQuotedString) {
+    ss << "\"unbalanced-quote\n";
+    ss << "\"quoted\\\n\" ";
+    ss << "\"unclosed quote and EOF";
+    lexer.pushSource(ss);
+
+    // EOL is encountered without closing the quote
+    const MasterLexer::Options options = common_options | MasterLexer::QSTRING;
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_QUOTES,
+              s_qstring.getToken(lexer).getErrorCode());
+    // We can resume after the error from the '\n'
+    EXPECT_EQ(s_null, State::start(lexer, options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // \n is okay in a quoted string if escaped
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    stringTokenCheck("quoted\\\n", s_string.getToken(lexer), true);
+
+    // EOF is encountered without closing the quote
+    EXPECT_EQ(&s_qstring, State::start(lexer, options));
+    EXPECT_EQ(s_null, s_qstring.handle(lexer));
+    ASSERT_EQ(Token::ERROR, s_qstring.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNEXPECTED_END, s_qstring.getToken(lexer).getErrorCode());
+    // If we continue we'll simply see the EOF
+    EXPECT_EQ(s_null, State::start(lexer, options));
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
 }
 }

+ 7 - 0
src/lib/dns/tests/master_lexer_token_unittest.cc

@@ -48,6 +48,9 @@ TEST_F(MasterLexerTokenTest, strings) {
     // basic construction and getter checks
     // basic construction and getter checks
     EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
     EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
     EXPECT_EQ(std::string("string token"), token_str.getString());
     EXPECT_EQ(std::string("string token"), token_str.getString());
+    std::string strval = "dummy"; // this should be replaced
+    token_str.getString(strval);
+    EXPECT_EQ(std::string("string token"), strval);
     const MasterLexer::Token::StringRegion str_region =
     const MasterLexer::Token::StringRegion str_region =
         token_str.getStringRegion();
         token_str.getStringRegion();
     EXPECT_EQ(TEST_STRING, str_region.beg);
     EXPECT_EQ(TEST_STRING, str_region.beg);
@@ -60,6 +63,8 @@ TEST_F(MasterLexerTokenTest, strings) {
     expected_str.push_back('\0');
     expected_str.push_back('\0');
     EXPECT_EQ(expected_str,
     EXPECT_EQ(expected_str,
               MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
               MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
+    MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString(strval);
+    EXPECT_EQ(expected_str, strval);
 
 
     // Construct type of qstring
     // Construct type of qstring
     EXPECT_EQ(MasterLexer::Token::QSTRING,
     EXPECT_EQ(MasterLexer::Token::QSTRING,
@@ -72,7 +77,9 @@ TEST_F(MasterLexerTokenTest, strings) {
 
 
     // getString/StringRegion() aren't allowed for non string(-variant) types
     // getString/StringRegion() aren't allowed for non string(-variant) types
     EXPECT_THROW(token_eof.getString(), isc::InvalidOperation);
     EXPECT_THROW(token_eof.getString(), isc::InvalidOperation);
+    EXPECT_THROW(token_eof.getString(strval), isc::InvalidOperation);
     EXPECT_THROW(token_num.getString(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getString(), isc::InvalidOperation);
+    EXPECT_THROW(token_num.getString(strval), isc::InvalidOperation);
     EXPECT_THROW(token_eof.getStringRegion(), isc::InvalidOperation);
     EXPECT_THROW(token_eof.getStringRegion(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getStringRegion(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getStringRegion(), isc::InvalidOperation);
 }
 }