Parcourir la source

[master] Merge branch 'trac2370'

JINMEI Tatuya il y a 12 ans
Parent
commit
27e863962f

+ 3 - 0
src/lib/dns/Makefile.am

@@ -95,6 +95,7 @@ libb10_dns___la_SOURCES += edns.h edns.cc
 libb10_dns___la_SOURCES += exceptions.h exceptions.cc
 libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
 libb10_dns___la_SOURCES += masterload.h masterload.cc
+libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
 libb10_dns___la_SOURCES += message.h message.cc
 libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
 libb10_dns___la_SOURCES += name.h name.cc
@@ -145,6 +146,8 @@ libdns___include_HEADERS = \
 	exceptions.h \
 	labelsequence.h \
 	message.h \
+	master_lexer.h \
+	masterload.h \
 	messagerenderer.h \
 	name.h \
 	question.h \

+ 47 - 0
src/lib/dns/master_lexer.cc

@@ -0,0 +1,47 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <dns/master_lexer.h>
+
+#include <cassert>
+#include <string>
+
+namespace {
+const char* const error_text[] = {
+    "lexer not started",        // NOT_STARTED
+    "unbalanced parentheses",   // UNBALANCED_PAREN
+    "unexpected end of input",  // UNEXPECTED_END
+    "unbalanced quotes"         // UNBALANCED_QUOTES
+};
+const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
+}
+
+namespace isc {
+namespace dns {
+
+std::string
+MasterLexer::Token::getErrorText() const {
+    if (type_ != ERROR) {
+        isc_throw(InvalidOperation,
+                  "Token::getErrorText() for non error type");
+    }
+
+    // The class integrity ensures the following:
+    assert(val_.error_code_ < error_text_max_count);
+    return (error_text[val_.error_code_]);
+}
+
+
+} // end of namespace dns
+} // end of namespace isc

+ 241 - 0
src/lib/dns/master_lexer.h

@@ -0,0 +1,241 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef MASTER_LEXER_H
+#define MASTER_LEXER_H 1
+
+#include <exceptions/exceptions.h>
+
+#include <string>
+
+#include <stdint.h>
+
+namespace isc {
+namespace dns {
+
+class MasterLexer {
+public:
+    class Token;       // we define it separately for better readability
+};
+
+/// \brief Tokens for \c MasterLexer
+///
+/// This is a simple value-class encapsulating a type of a lexer token and
+/// (if it has a value) its value.  Essentially, the class provides
+/// constructors corresponding to different types of tokens, and corresponding
+/// getter methods.  The type and value are fixed at the time of construction
+/// and will never be modified throughout the lifetime of the object.
+/// The getter methods are still provided to maximize the safety; an
+/// application cannot refer to a value that is invalid for the type of token.
+///
+/// This class is intentionally implemented as copyable and assignable
+/// (using the default version of copy constructor and assignment operator),
+/// but it's mainly for internal implementation convenience.  Applications will
+/// simply refer to Token object as a reference via the \c MasterLexer class.
+class MasterLexer::Token {
+public:
+    /// \brief Enumeration for token types
+    ///
+    /// \note At the time of initial implementation, all numeric tokens
+    /// that would be extracted from \c MasterLexer should be represented
+    /// as an unsigned 32-bit integer.  If we see the need for larger integers
+    /// or negative numbers, we can then extend the token types.
+    enum Type {
+        END_OF_LINE, ///< End of line detected (if asked for detecting it)
+        END_OF_FILE, ///< End of file detected (if asked for detecting it)
+        INITIAL_WS,  ///< White spaces at the beginning of a line
+        NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
+                                       /// no-value (type only) types.
+                                       /// Mainly for internal use.
+        STRING, ///< A single string
+        QSTRING, ///< A single string quoted by double-quotes (").
+        NUMBER,  ///< A decimal number (unsigned 32-bit)
+        ERROR    ///< Error detected in getting a token
+    };
+
+    /// \brief Enumeration for lexer error codes
+    enum ErrorCode {
+        NOT_STARTED, ///< The lexer is just initialized and has no token
+        UNBALANCED_PAREN,       ///< Unbalanced parentheses detected
+        UNEXPECTED_END, ///< The lexer reaches the end of line or file
+                       /// unexpectedly
+        UNBALANCED_QUOTES,      ///< Unbalanced quotations detected
+        MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
+                       /// (excluding this one). Mainly for internal use.
+    };
+
+    /// \brief A simple representation of a range of a string.
+    ///
+    /// This is a straightforward pair of the start pointer of a string
+    /// and its length.  The \c STRING and \c QSTRING types of tokens
+    /// will be primarily represented in this form.
+    ///
+    /// Any character can be stored in the valid range of the region.
+    /// In particular, there can be a nul character (\0) in the middle of
+    /// the region.  On the other hand, it is not ensured that the string
+    /// is nul-terminated.  So the usual string manipulation API may not work
+    /// as expected.
+    struct StringRegion {
+        const char* beg;        ///< The start address of the string
+        size_t len;             ///< The length of the string in bytes
+    };
+
+    /// \brief Constructor for non-value type of token.
+    ///
+    /// \throw InvalidParameter A value type token is specified.
+    /// \param type The type of the token.  It must indicate a non-value
+    /// type (not larger than \c NOVALUE_TYPE_MAX).
+    explicit Token(Type type) : type_(type) {
+        if (type > NOVALUE_TYPE_MAX) {
+            isc_throw(InvalidParameter, "Token per-type constructor "
+                      "called with invalid type: " << type);
+        }
+    }
+
+    /// \brief Constructor for string and quoted-string types of token.
+    ///
+    /// The optional \c quoted parameter specifies whether it's a quoted or
+    /// non quoted string.
+    ///
+    /// The string is specified as a pair of a pointer to the start address
+    /// and its length.  Any character can be contained in any position of
+    /// the valid range (see \c StringRegion).
+    ///
+    /// When it's a quoted string, the quotation marks must be excluded
+    /// from the specified range.
+    ///
+    /// \param str_beg The start address of the string
+    /// \param str_len The size of the string in bytes
+    /// \param quoted true if it's a quoted string; false otherwise.
+    Token(const char* str_beg, size_t str_len, bool quoted = false) :
+        type_(quoted ? QSTRING : STRING)
+    {
+        val_.str_region_.beg = str_beg;
+        val_.str_region_.len = str_len;
+    }
+
+    /// \brief Constructor for number type of token.
+    ///
+    /// \brief number An unsigned 32-bit integer corresponding to the token
+    /// value.
+    explicit Token(uint32_t number) : type_(NUMBER) {
+        val_.number_ = number;
+    }
+
+    /// \brief Constructor for error type of token.
+    ///
+    /// \throw InvalidParameter Invalid error code value is specified.
+    /// \brief error_code A pre-defined constant of \c ErrorCode.
+    explicit Token(ErrorCode error_code) : type_(ERROR) {
+        if (!(error_code < MAX_ERROR_CODE)) {
+            isc_throw(InvalidParameter, "Invalid master lexer error code: "
+                      << error_code);
+        }
+        val_.error_code_ = error_code;
+    }
+
+    /// \brief Return the token type.
+    ///
+    /// \throw none
+    Type getType() const { return (type_); }
+
+    /// \brief Return the value of a string-variant token.
+    ///
+    /// \throw InvalidOperation Called on a non string-variant types of token.
+    /// \return A reference to \c StringRegion corresponding to the string
+    ///         token value.
+    const StringRegion& getStringRegion() const {
+        if (type_ != STRING && type_ != QSTRING) {
+            isc_throw(InvalidOperation,
+                      "Token::getStringRegion() for non string-variant type");
+        }
+        return (val_.str_region_);
+    }
+
+    /// \brief Return the value of a string-variant token as a string object.
+    ///
+    /// Note that the underlying string may contain a nul (\0) character
+    /// in the middle.  The returned string object will contain all characters
+    /// of the valid range of the underlying string.  So some string
+    /// operations such as c_str() may not work as expected.
+    ///
+    /// \throw InvalidOperation Called on a non string-variant types of token.
+    /// \throw std::bad_alloc Resource allocation failure in constructing the
+    ///                       string object.
+    /// \return A std::string object corresponding to the string token value.
+    std::string getString() const {
+        if (type_ != STRING && type_ != QSTRING) {
+            isc_throw(InvalidOperation,
+                      "Token::getString() for non string-variant type");
+        }
+        return (std::string(val_.str_region_.beg,
+                            val_.str_region_.beg + val_.str_region_.len));
+    }
+
+    /// \brief Return the value of a string-variant token as a string object.
+    ///
+    /// \throw InvalidOperation Called on a non number type of token.
+    /// \return The integer corresponding to the number token value.
+    uint32_t getNumber() const {
+        if (type_ != NUMBER) {
+            isc_throw(InvalidOperation,
+                      "Token::getNumber() for non number type");
+        }
+        return (val_.number_);
+    }
+
+    /// \brief Return the error code of a error type token.
+    ///
+    /// \throw InvalidOperation Called on a non error type of token.
+    /// \return The error code of the token.
+    ErrorCode getErrorCode() const {
+        if (type_ != ERROR) {
+            isc_throw(InvalidOperation,
+                      "Token::getErrorCode() for non error type");
+        }
+        return (val_.error_code_);
+    };
+
+    /// \brief Return a textual description of the error of a error type token.
+    ///
+    /// The returned string would be useful to produce a log message when
+    /// a zone file parser encounters an error.
+    ///
+    /// \throw InvalidOperation Called on a non error type of token.
+    /// \throw std::bad_alloc Resource allocation failure in constructing the
+    ///                       string object.
+    /// \return A string object that describes the meaning of the error.
+    std::string getErrorText() const;
+
+private:
+    Type type_;    // this is not const so the class can be assignable
+
+    // We use a union to represent different types of token values via the
+    // unified Token class.  The class integrity should ensure valid operation
+    // on the union; getter methods should only refer to the member set at
+    // the construction.
+    union {
+        StringRegion str_region_;
+        uint32_t number_;
+        ErrorCode error_code_;
+    } val_;
+};
+
+} // namespace dns
+} // namespace isc
+#endif  // MASTER_LEXER_H
+
+// Local Variables:
+// mode: c++
+// End:

+ 1 - 0
src/lib/dns/tests/Makefile.am

@@ -24,6 +24,7 @@ run_unittests_SOURCES = unittest_util.h unittest_util.cc
 run_unittests_SOURCES += edns_unittest.cc
 run_unittests_SOURCES += labelsequence_unittest.cc
 run_unittests_SOURCES += messagerenderer_unittest.cc
+run_unittests_SOURCES += master_lexer_token_unittest.cc
 run_unittests_SOURCES += name_unittest.cc
 run_unittests_SOURCES += nsec3hash_unittest.cc
 run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc

+ 156 - 0
src/lib/dns/tests/master_lexer_token_unittest.cc

@@ -0,0 +1,156 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <exceptions/exceptions.h>
+
+#include <dns/master_lexer.h>
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+using namespace isc::dns;
+
+namespace {
+
+const char TEST_STRING[] = "string token";
+// This excludes the ending \0 character
+const size_t TEST_STRING_LEN = sizeof(TEST_STRING) - 1;
+
+class MasterLexerTokenTest : public ::testing::Test {
+protected:
+    MasterLexerTokenTest() :
+        token_eof(MasterLexer::Token::END_OF_FILE),
+        token_str(TEST_STRING, TEST_STRING_LEN),
+        token_num(42),
+        token_err(MasterLexer::Token::UNEXPECTED_END)
+    {}
+
+    const MasterLexer::Token token_eof; // an example of non-value type token
+    const MasterLexer::Token token_str;
+    const MasterLexer::Token token_num;
+    const MasterLexer::Token token_err;
+};
+
+
+TEST_F(MasterLexerTokenTest, strings) {
+    // basic construction and getter checks
+    EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
+    EXPECT_EQ(std::string("string token"), token_str.getString());
+    const MasterLexer::Token::StringRegion str_region =
+        token_str.getStringRegion();
+    EXPECT_EQ(TEST_STRING, str_region.beg);
+    EXPECT_EQ(TEST_STRING_LEN, str_region.len);
+
+    // Even if the stored string contains a nul character (in this case,
+    // it happens to be at the end of the string, but could be in the middle),
+    // getString() should return a string object containing the nul.
+    std::string expected_str("string token");
+    expected_str.push_back('\0');
+    EXPECT_EQ(expected_str,
+              MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
+
+    // Construct type of qstring
+    EXPECT_EQ(MasterLexer::Token::QSTRING,
+              MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), true).
+              getType());
+    // if we explicitly set 'quoted' to false, it should be normal string
+    EXPECT_EQ(MasterLexer::Token::STRING,
+              MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), false).
+              getType());
+
+    // getString/StringRegion() aren't allowed for non string(-variant) types
+    EXPECT_THROW(token_eof.getString(), isc::InvalidOperation);
+    EXPECT_THROW(token_num.getString(), isc::InvalidOperation);
+    EXPECT_THROW(token_eof.getStringRegion(), isc::InvalidOperation);
+    EXPECT_THROW(token_num.getStringRegion(), isc::InvalidOperation);
+}
+
+TEST_F(MasterLexerTokenTest, numbers) {
+    EXPECT_EQ(42, token_num.getNumber());
+    EXPECT_EQ(MasterLexer::Token::NUMBER, token_num.getType());
+
+    // It's copyable and assignable.
+    MasterLexer::Token token(token_num);
+    EXPECT_EQ(42, token.getNumber());
+    EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
+
+    token = token_num;
+    EXPECT_EQ(42, token.getNumber());
+    EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
+
+    // it's okay to replace it with a different type of token
+    token = token_eof;
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token.getType());
+
+    // Possible max value
+    token = MasterLexer::Token(0xffffffff);
+    EXPECT_EQ(4294967295u, token.getNumber());
+
+    // getNumber() isn't allowed for non number types
+    EXPECT_THROW(token_eof.getNumber(), isc::InvalidOperation);
+    EXPECT_THROW(token_str.getNumber(), isc::InvalidOperation);
+}
+
+TEST_F(MasterLexerTokenTest, novalues) {
+    // Just checking we can construct them and getType() returns correct value.
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token_eof.getType());
+    EXPECT_EQ(MasterLexer::Token::END_OF_LINE,
+              MasterLexer::Token(MasterLexer::Token::END_OF_LINE).getType());
+    EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
+              MasterLexer::Token(MasterLexer::Token::INITIAL_WS).getType());
+
+    // Special types of tokens cannot have value-based types
+    EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::STRING),
+                 isc::InvalidParameter);
+    EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::QSTRING),
+                 isc::InvalidParameter);
+    EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::NUMBER),
+                 isc::InvalidParameter);
+    EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::ERROR),
+                 isc::InvalidParameter);
+}
+
+TEST_F(MasterLexerTokenTest, errors) {
+    EXPECT_EQ(MasterLexer::Token::ERROR, token_err.getType());
+    EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END, token_err.getErrorCode());
+    EXPECT_EQ("unexpected end of input", token_err.getErrorText());
+    EXPECT_EQ("lexer not started",
+              MasterLexer::Token(MasterLexer::Token::NOT_STARTED).
+              getErrorText());
+    EXPECT_EQ("unbalanced parentheses",
+              MasterLexer::Token(MasterLexer::Token::UNBALANCED_PAREN).
+              getErrorText());
+    EXPECT_EQ("unbalanced quotes",
+              MasterLexer::Token(MasterLexer::Token::UNBALANCED_QUOTES).
+              getErrorText());
+
+    // getErrorCode/Text() isn't allowed for non number types
+    EXPECT_THROW(token_num.getErrorCode(), isc::InvalidOperation);
+    EXPECT_THROW(token_num.getErrorText(), isc::InvalidOperation);
+
+    // Only the pre-defined error code is accepted.  Hardcoding '4' (max code
+    // + 1) is intentional; it'd be actually better if we notice it when we
+    // update the enum list (which shouldn't happen too often).
+    EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(4)),
+                 isc::InvalidParameter);
+
+    // Check the coexistence of "from number" and "from error-code"
+    // constructors won't cause confusion.
+    EXPECT_EQ(MasterLexer::Token::NUMBER,
+              MasterLexer::Token(static_cast<uint32_t>(
+                                     MasterLexer::Token::NOT_STARTED)).
+              getType());
+}
+}