Browse Source

[master] Merge branch 'trac1603'

JINMEI Tatuya 13 years ago
parent
commit
9a2a86f3f4

+ 2 - 1
src/lib/dns/Makefile.am

@@ -98,6 +98,7 @@ libdns___la_SOURCES += masterload.h masterload.cc
 libdns___la_SOURCES += message.h message.cc
 libdns___la_SOURCES += messagerenderer.h messagerenderer.cc
 libdns___la_SOURCES += name.h name.cc
+libdns___la_SOURCES += name_internal.h
 libdns___la_SOURCES += nsec3hash.h nsec3hash.cc
 libdns___la_SOURCES += opcode.h opcode.cc
 libdns___la_SOURCES += rcode.h rcode.cc
@@ -156,7 +157,7 @@ libdns___include_HEADERS = \
 	rrttl.h \
 	tsigkey.h
 # Purposely not installing these headers:
-# util/*.h: used only internally, and not actually DNS specific
+# name_internal.h: used only internally, and not actually DNS specific
 # rdata/*/detail/*.h: these are internal use only
 # rrclass-placeholder.h
 # rrtype-placeholder.h

+ 8 - 2
src/lib/dns/benchmarks/Makefile.am

@@ -9,10 +9,16 @@ endif
 
 CLEANFILES = *.gcno *.gcda
 
-noinst_PROGRAMS = rdatarender_bench
+noinst_PROGRAMS = rdatarender_bench message_renderer_bench
+
 rdatarender_bench_SOURCES = rdatarender_bench.cc
 
 rdatarender_bench_LDADD = $(top_builddir)/src/lib/dns/libdns++.la
 rdatarender_bench_LDADD += $(top_builddir)/src/lib/util/libutil.la
 rdatarender_bench_LDADD += $(top_builddir)/src/lib/exceptions/libexceptions.la
-rdatarender_bench_LDADD += $(SQLITE_LIBS)
+
+message_renderer_bench_SOURCES = message_renderer_bench.cc
+message_renderer_bench_SOURCES += oldmessagerenderer.h oldmessagerenderer.cc
+message_renderer_bench_LDADD = $(top_builddir)/src/lib/dns/libdns++.la
+message_renderer_bench_LDADD += $(top_builddir)/src/lib/util/libutil.la
+message_renderer_bench_LDADD += $(top_builddir)/src/lib/exceptions/libexceptions.la

+ 176 - 0
src/lib/dns/benchmarks/message_renderer_bench.cc

@@ -0,0 +1,176 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <bench/benchmark.h>
+
+#include <dns/name.h>
+#include <dns/messagerenderer.h>
+#include <oldmessagerenderer.h>
+
+#include <cassert>
+#include <vector>
+
+using namespace std;
+using namespace isc::util;
+using namespace isc::bench;
+using namespace isc::dns;
+
+namespace {
+// This templated test performs rendering given set of names using
+// a given (templated) MessageRenderer implementation.  We can check the
+// performance when we modify the renderer implementation by comparing the
+// old and new implementation for the same data.
+template <typename T>
+class MessageRendererBenchMark {
+public:
+    MessageRendererBenchMark(const vector<Name>& names) :
+        names_(names)
+    {}
+    unsigned int run() {
+        renderer_.clear();
+        vector<Name>::const_iterator it = names_.begin();
+        const vector<Name>::const_iterator it_end = names_.end();
+        for (; it != it_end; ++it) {
+            renderer_.writeName(*it);
+        }
+        // Make sure truncation didn't accidentally happen.
+        assert(!renderer_.isTruncated());
+        return (names_.size());
+    }
+private:
+    T renderer_;
+    const vector<Name>& names_;
+};
+
+//
+// Builtin benchmark data.
+//
+// This consists of all names contained in a response from a root server for
+// the query for "www.example.com" (as of this implementing).
+const char* const root_to_com_names[] = {
+    // question section
+    "www.example.com",
+    // authority section
+    "com", "a.gtld-servers.net", "com", "b.gtld-servers.net",
+    "com", "c.gtld-servers.net", "com", "d.gtld-servers.net",
+    "com", "e.gtld-servers.net", "com", "f.gtld-servers.net",
+    "com", "g.gtld-servers.net", "com", "h.gtld-servers.net",
+    "com", "i.gtld-servers.net", "com", "j.gtld-servers.net",
+    "com", "k.gtld-servers.net", "com", "l.gtld-servers.net",
+    "com",                      // owner name of DS
+    "com",                      // owner name of RRSIG(DS)
+    // additional section.  a and b has both AAAA and A; others have A only.
+    "a.gtld-servers.net", "a.gtld-servers.net",
+    "b.gtld-servers.net", "b.gtld-servers.net",
+    "c.gtld-servers.net", "d.gtld-servers.net", "e.gtld-servers.net",
+    "f.gtld-servers.net", "g.gtld-servers.net", "h.gtld-servers.net",
+    "i.gtld-servers.net", "j.gtld-servers.net", "k.gtld-servers.net",
+    "l.gtld-servers.net", "m.gtld-servers.net",
+    NULL
+};
+
+// Names contained a typical "NXDOMAIN" response: the question, the owner
+// name of SOA, and its MNAME and RNAME.
+const char* const example_nxdomain_names[] = {
+    "www.example.com", "example.com", "ns.example.com", "root.example.com",
+    NULL
+};
+
+// Names contained a typical "SERVFAIL" response: only the question.
+const char* const example_servfail_names[] = {
+    "www.example.com", NULL
+};
+
+// An experimental "dumb" renderer for comparison.  It doesn't do any name
+// compression.  It simply ignores all setter method, returns a dummy value
+// for getter methods, and write names to the internal buffer as plain binary
+// data.
+class DumbMessageRenderer : public AbstractMessageRenderer {
+public:
+    virtual void clear() {}
+    virtual size_t getLengthLimit() const { return (512); }
+    virtual void setLengthLimit(const size_t) {}
+    virtual bool isTruncated() const { return (false); }
+    virtual void setTruncated() {}
+    virtual CompressMode getCompressMode() const { return (CASE_INSENSITIVE); }
+    virtual void setCompressMode(const CompressMode) {}
+    virtual void writeName(const Name& name, const bool = false) {
+        name.toWire(getBuffer());
+    }
+};
+
+void
+usage() {
+    cerr << "Usage: message_renderer_bench [-n iterations]" << endl;
+    exit (1);
+}
+}
+
+int
+main(int argc, char* argv[]) {
+    int ch;
+    int iteration = 100000;
+    while ((ch = getopt(argc, argv, "n:")) != -1) {
+        switch (ch) {
+        case 'n':
+            iteration = atoi(optarg);
+            break;
+        case '?':
+        default:
+            usage();
+        }
+    }
+    argc -= optind;
+    argv += optind;
+    if (argc != 0) {
+        usage();
+    }
+
+    cout << "Parameters:" << endl;
+    cout << "  Iterations: " << iteration << endl;
+
+    typedef pair<const char* const*, string> DataSpec;
+    vector<DataSpec> spec_list;
+    spec_list.push_back(DataSpec(root_to_com_names, "(positive response)"));
+    spec_list.push_back(DataSpec(example_nxdomain_names,
+                                 "(NXDOMAIN response)"));
+    spec_list.push_back(DataSpec(example_servfail_names,
+                                 "(SERVFAIL response)"));
+    for (vector<DataSpec>::const_iterator it = spec_list.begin();
+         it != spec_list.end();
+         ++it) {
+        vector<Name> names;
+        for (size_t i = 0; it->first[i] != NULL; ++i) {
+            names.push_back(Name(it->first[i]));
+        }
+
+        typedef MessageRendererBenchMark<OldMessageRenderer>
+            OldRendererBenchMark;
+        cout << "Benchmark for old MessageRenderer " << it->second << endl;
+        BenchMark<OldRendererBenchMark>(iteration,
+                                        OldRendererBenchMark(names));
+
+        typedef MessageRendererBenchMark<DumbMessageRenderer>
+            DumbRendererBenchMark;
+        cout << "Benchmark for dumb MessageRenderer " << it->second << endl;
+        BenchMark<DumbRendererBenchMark>(iteration,
+                                         DumbRendererBenchMark(names));
+
+        typedef MessageRendererBenchMark<MessageRenderer> RendererBenchMark;
+        cout << "Benchmark for new MessageRenderer " << it->second << endl;
+        BenchMark<RendererBenchMark>(iteration, RendererBenchMark(names));
+    }
+
+    return (0);
+}

+ 278 - 0
src/lib/dns/benchmarks/oldmessagerenderer.cc

@@ -0,0 +1,278 @@
+// Copyright (C) 2009  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <exceptions/exceptions.h>
+#include <util/buffer.h>
+#include <dns/name.h>
+#include <oldmessagerenderer.h>
+
+#include <cctype>
+#include <cassert>
+#include <set>
+
+using namespace isc::util;
+
+namespace isc {
+namespace dns {
+
+namespace {     // hide internal-only names from the public namespaces
+///
+/// \brief The \c NameCompressNode class represents a pointer to a name
+/// rendered in the internal buffer for the \c MessageRendererImpl object.
+///
+/// A \c MessageRendererImpl object maintains a set of the \c NameCompressNode
+/// objects, and searches the set for the position of the longest match
+/// (ancestor) name against each new name to be rendered into the buffer.
+struct NameCompressNode {
+    NameCompressNode(const OldMessageRenderer& renderer,
+                     const OutputBuffer& buffer, const size_t pos,
+                     const size_t len) :
+        renderer_(renderer), buffer_(buffer), pos_(pos), len_(len) {}
+    /// The renderer that performs name compression using the node.
+    /// This is kept in each node to detect the compression mode
+    /// (case-sensitive or not) in the comparison functor (\c NameCompare).
+    const OldMessageRenderer& renderer_;
+    /// The buffer in which the corresponding name is rendered.
+    const OutputBuffer& buffer_;
+    /// The position (offset from the beginning) in the buffer where the
+    /// name starts.
+    uint16_t pos_;
+    /// The length of the corresponding name.
+    uint16_t len_;
+};
+
+///
+/// \brief The \c NameCompare class is a functor that gives ordering among
+/// \c NameCompressNode objects stored in \c MessageRendererImpl::nodeset_.
+///
+/// Its only public method as a functor, \c operator(), gives the ordering
+/// between two \c NameCompressNode objects in terms of equivalence, that is,
+/// returns whether one is "less than" the other.
+/// For our purpose we only need to distinguish two different names, so the
+/// ordering is different from the canonical DNS name order used in DNSSEC;
+/// basically, it gives the case-insensitive ordering of the two names as their
+/// textual representation.
+struct NameCompare : public std::binary_function<NameCompressNode,
+                                                 NameCompressNode,
+                                                 bool> {
+    ///
+    /// Returns true if n1 < n2 as a result of case-insensitive comparison;
+    /// otherwise return false.
+    ///
+    /// The name corresponding to \c n1 or \c n2 may be compressed, in which
+    /// case we must follow the compression pointer in the associated buffer.
+    /// The helper private method \c nextPosition() gives the position in the
+    /// buffer for the next character, taking into account compression.
+    ///
+    bool operator()(const NameCompressNode& n1,
+                    const NameCompressNode& n2) const
+    {
+        if (n1.len_ < n2.len_) {
+            return (true);
+        } else if (n1.len_ > n2.len_) {
+            return (false);
+        }
+
+        const bool case_sensitive =
+            (n1.renderer_.getCompressMode() == OldMessageRenderer::CASE_SENSITIVE);
+
+        uint16_t pos1 = n1.pos_;
+        uint16_t pos2 = n2.pos_;
+        uint16_t l1 = 0;
+        uint16_t l2 = 0;
+        for (uint16_t i = 0; i < n1.len_; i++, pos1++, pos2++) {
+            pos1 = nextPosition(n1.buffer_, pos1, l1);
+            pos2 = nextPosition(n2.buffer_, pos2, l2);
+            if (case_sensitive) {
+                if (n1.buffer_[pos1] < n2.buffer_[pos2]) {
+                    return (true);
+                } else if (n1.buffer_[pos1] > n2.buffer_[pos2]) {
+                    return (false);
+                }
+            } else {
+                if (tolower(n1.buffer_[pos1]) < tolower(n2.buffer_[pos2])) {
+                    return (true);
+                } else if (tolower(n1.buffer_[pos1]) >
+                           tolower(n2.buffer_[pos2])) {
+                    return (false);
+                }
+            }
+        }
+
+        return (false);
+    }
+
+private:
+    uint16_t nextPosition(const OutputBuffer& buffer,
+                          uint16_t pos, uint16_t& llen) const
+    {
+        if (llen == 0) {
+            size_t i = 0;
+
+            while ((buffer[pos] & Name::COMPRESS_POINTER_MARK8) ==
+                   Name::COMPRESS_POINTER_MARK8) {
+                pos = (buffer[pos] & ~Name::COMPRESS_POINTER_MARK8) *
+                    256 + buffer[pos + 1];
+
+                // This loop should stop as long as the buffer has been
+                // constructed validly and the search/insert argument is based
+                // on a valid name, which is an assumption for this class.
+                // But we'll abort if a bug could cause an infinite loop.
+                i += 2;
+                assert(i < Name::MAX_WIRE);
+            }
+            llen = buffer[pos];
+        } else {
+            --llen;
+        }
+        return (pos);
+    }
+};
+}
+
+///
+/// \brief The \c MessageRendererImpl class is the actual implementation of
+/// \c MessageRenderer.
+///
+/// The implementation is hidden from applications.  We can refer to specific
+/// members of this class only within the implementation source file.
+///
+struct OldMessageRenderer::MessageRendererImpl {
+    /// \brief Constructor from an output buffer.
+    ///
+    MessageRendererImpl() :
+        nbuffer_(Name::MAX_WIRE), msglength_limit_(512),
+        truncated_(false), compress_mode_(OldMessageRenderer::CASE_INSENSITIVE)
+    {}
+    /// A local working buffer to convert each given name into wire format.
+    /// This could be a local variable of the \c writeName() method, but
+    /// we keep it in the class so that we can reuse it and avoid construction
+    /// overhead.
+    OutputBuffer nbuffer_;
+    /// A set of compression pointers.
+    std::set<NameCompressNode, NameCompare> nodeset_;
+    /// The maximum length of rendered data that can fit without
+    /// truncation.
+    uint16_t msglength_limit_;
+    /// A boolean flag that indicates truncation has occurred while rendering
+    /// the data.
+    bool truncated_;
+    /// The name compression mode.
+    CompressMode compress_mode_;
+};
+
+OldMessageRenderer::OldMessageRenderer() :
+    AbstractMessageRenderer(),
+    impl_(new MessageRendererImpl)
+{}
+
+OldMessageRenderer::~OldMessageRenderer() {
+    delete impl_;
+}
+
+void
+OldMessageRenderer::clear() {
+    AbstractMessageRenderer::clear();
+    impl_->nbuffer_.clear();
+    impl_->nodeset_.clear();
+    impl_->msglength_limit_ = 512;
+    impl_->truncated_ = false;
+    impl_->compress_mode_ = CASE_INSENSITIVE;
+}
+
+size_t
+OldMessageRenderer::getLengthLimit() const {
+    return (impl_->msglength_limit_);
+}
+
+void
+OldMessageRenderer::setLengthLimit(const size_t len) {
+    impl_->msglength_limit_ = len;
+}
+
+bool
+OldMessageRenderer::isTruncated() const {
+    return (impl_->truncated_);
+}
+
+void
+OldMessageRenderer::setTruncated() {
+    impl_->truncated_ = true;
+}
+
+OldMessageRenderer::CompressMode
+OldMessageRenderer::getCompressMode() const {
+    return (impl_->compress_mode_);
+}
+
+void
+OldMessageRenderer::setCompressMode(const CompressMode mode) {
+    impl_->compress_mode_ = mode;
+}
+
+void
+OldMessageRenderer::writeName(const Name& name, const bool compress) {
+    impl_->nbuffer_.clear();
+    name.toWire(impl_->nbuffer_);
+
+    unsigned int i;
+    std::set<NameCompressNode, NameCompare>::const_iterator notfound =
+        impl_->nodeset_.end();
+    std::set<NameCompressNode, NameCompare>::const_iterator n = notfound;
+
+    // Find the longest ancestor name in the rendered set that matches the
+    // given name.
+    for (i = 0; i < impl_->nbuffer_.getLength(); i += impl_->nbuffer_[i] + 1) {
+        // skip the trailing null label
+        if (impl_->nbuffer_[i] == 0) {
+            continue;
+        }
+        n = impl_->nodeset_.find(NameCompressNode(*this, impl_->nbuffer_, i,
+                                                  impl_->nbuffer_.getLength() -
+                                                  i));
+        if (n != notfound) {
+            break;
+        }
+    }
+
+    // Record the current offset before extending the buffer.
+    const size_t offset = getLength();
+    // Write uncompress part...
+    writeData(impl_->nbuffer_.getData(),
+              compress ? i : impl_->nbuffer_.getLength());
+    if (compress && n != notfound) {
+        // ...and compression pointer if available.
+        uint16_t pointer = (*n).pos_;
+        pointer |= Name::COMPRESS_POINTER_MARK16;
+        writeUint16(pointer);
+    }
+
+    // Finally, add to the set the newly rendered name and its ancestors that
+    // have not been in the set.
+    for (unsigned int j = 0; j < i; j += impl_->nbuffer_[j] + 1) {
+        if (impl_->nbuffer_[j] == 0) {
+            continue;
+        }
+        if (offset + j > Name::MAX_COMPRESS_POINTER) {
+            break;
+        }
+        impl_->nodeset_.insert(NameCompressNode(*this, getBuffer(),
+                                                offset + j,
+                                                impl_->nbuffer_.getLength() -
+                                                j));
+    }
+}
+
+}
+}

+ 55 - 0
src/lib/dns/benchmarks/oldmessagerenderer.h

@@ -0,0 +1,55 @@
+// Copyright (C) 2009  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef __OLDMESSAGERENDERER_H
+#define __OLDMESSAGERENDERER_H 1
+
+//
+// This is a copy of an older version of MessageRenderer class.  It is kept
+// here to provide a benchmark target.
+//
+
+#include <dns/messagerenderer.h>
+
+namespace isc {
+namespace dns {
+
+class OldMessageRenderer : public AbstractMessageRenderer {
+public:
+    using AbstractMessageRenderer::CASE_INSENSITIVE;
+    using AbstractMessageRenderer::CASE_SENSITIVE;
+
+    /// \brief Constructor from an output buffer.
+    OldMessageRenderer();
+
+    virtual ~OldMessageRenderer();
+    virtual bool isTruncated() const;
+    virtual size_t getLengthLimit() const;
+    virtual CompressMode getCompressMode() const;
+    virtual void setTruncated();
+    virtual void setLengthLimit(size_t len);
+    virtual void setCompressMode(CompressMode mode);
+    virtual void clear();
+    virtual void writeName(const Name& name, bool compress = true);
+private:
+    struct MessageRendererImpl;
+    MessageRendererImpl* impl_;
+};
+}
+}
+#endif // __OLDMESSAGERENDERER_H
+
+// Local Variables:
+// mode: c++
+// End:

+ 43 - 6
src/lib/dns/labelsequence.cc

@@ -13,25 +13,33 @@
 // PERFORMANCE OF THIS SOFTWARE.
 
 #include <dns/labelsequence.h>
+#include <dns/name_internal.h>
 #include <exceptions/exceptions.h>
 
-#include <iostream>
+#include <boost/functional/hash.hpp>
+
 namespace isc {
 namespace dns {
 
 const char*
 LabelSequence::getData(size_t *len) const {
+    *len = getDataLength();
+    return (&name_.ndata_[name_.offsets_[first_label_]]);
+}
+
+size_t
+LabelSequence::getDataLength() const {
     // If the labelsequence is absolute, the current last_label_ falls
     // out of the vector (since it points to the 'label' after the
     // root label, which doesn't exist; in that case, return
     // the length for the 'previous' label (the root label) plus
     // one (for the root label zero octet)
     if (isAbsolute()) {
-        *len = name_.offsets_[last_label_ - 1] - name_.offsets_[first_label_] + 1;
+        return (name_.offsets_[last_label_ - 1] -
+                name_.offsets_[first_label_] + 1);
     } else {
-        *len = name_.offsets_[last_label_] - name_.offsets_[first_label_];
+        return (name_.offsets_[last_label_] - name_.offsets_[first_label_]);
     }
-    return (&name_.ndata_[name_.offsets_[first_label_]]);
 }
 
 bool
@@ -44,10 +52,21 @@ LabelSequence::equals(const LabelSequence& other, bool case_sensitive) const {
         return (false);
     }
     if (case_sensitive) {
-        return (strncasecmp(data, other_data, len) == 0);
-    } else {
         return (strncmp(data, other_data, len) == 0);
     }
+
+    // As long as the data was originally validated as (part of) a name,
+    // label length must never be a capital ascii character, so we can
+    // simply compare them after converting to lower characters.
+    for (size_t i = 0; i < len; ++i) {
+        const unsigned char ch = data[i];
+        const unsigned char other_ch = other_data[i];
+        if (isc::dns::name::internal::maptolower[ch] !=
+            isc::dns::name::internal::maptolower[other_ch]) {
+            return (false);
+        }
+    }
+    return (true);
 }
 
 void
@@ -73,5 +92,23 @@ LabelSequence::isAbsolute() const {
     return (last_label_ == name_.offsets_.size());
 }
 
+size_t
+LabelSequence::getHash(bool case_sensitive) const {
+    size_t length;
+    const char* s = getData(&length);
+    if (length > 16) {
+        length = 16;
+    }
+
+    size_t hash_val = 0;
+    while (length > 0) {
+        const unsigned char c = *s++;
+        boost::hash_combine(hash_val, case_sensitive ? c :
+                            isc::dns::name::internal::maptolower[c]);
+        --length;
+    }
+    return (hash_val);
+}
+
 } // end namespace dns
 } // end namespace isc

+ 43 - 2
src/lib/dns/labelsequence.h

@@ -69,6 +69,22 @@ public:
     /// \return Pointer to the wire-format data of this label sequence
     const char* getData(size_t* len) const;
 
+    /// \brief Return the length of the wire-format data of this LabelSequence
+    ///
+    /// This method returns the number of octets for the data that would
+    /// be returned by the \c getData() method.
+    ///
+    /// Note that the return value of this method is always positive.
+    /// Note also that if the return value of this method is 1, it means the
+    /// sequence consists of the null label, i.e., a single "dot", and vice
+    /// versa.
+    ///
+    /// \note The data pointed to is only valid if the original Name
+    /// object is still in scope
+    ///
+    /// \return The length of the data of the label sequence in octets.
+    size_t getDataLength() const;
+
     /// \brief Compares two label sequences.
     ///
     /// Performs a (optionally case-insensitive) comparison between this
@@ -105,7 +121,7 @@ public:
     /// \brief Returns the current number of labels for this LabelSequence
     ///
     /// \return The number of labels
-    size_t getLabelCount() const { return last_label_ - first_label_; }
+    size_t getLabelCount() const { return (last_label_ - first_label_); }
 
     /// \brief Returns the original Name object associated with this
     ///        LabelSequence
@@ -116,7 +132,32 @@ public:
     /// LabelSequence itself.
     ///
     /// \return Reference to the original Name object
-    const Name& getName() const { return name_; }
+    const Name& getName() const { return (name_); }
+
+    /// \brief Calculate a simple hash for the label sequence.
+    ///
+    /// This method calculates a hash value for the label sequence as binary
+    /// data.  If \c case_sensitive is false, it ignores the case stored in
+    /// the labels; specifically, it normalizes the labels by converting all
+    /// upper case characters to lower case ones and calculates the hash value
+    /// for the result.
+    ///
+    /// This method is intended to provide a lightweight way to store a
+    /// relatively small number of label sequences in a hash table.
+    /// For this reason it only takes into account data up to 16 octets
+    /// (16 was derived from BIND 9's implementation).  Also, the function does
+    /// not provide any unpredictability; a specific sequence will always have
+    /// the same hash value.  It should therefore not be used in the context
+    /// where an untrusted third party can mount a denial of service attack by
+    /// forcing the application to create a very large number of label
+    /// sequences that have the same hash value and expected to be stored in
+    /// a hash table.
+    ///
+    /// \exception None
+    ///
+    /// \param case_sensitive
+    /// \return A hash value for this label sequence.
+    size_t getHash(bool case_sensitive) const;
 
     /// \brief Checks whether the label sequence is absolute
     ///

+ 201 - 116
src/lib/dns/messagerenderer.cc

@@ -15,102 +15,105 @@
 #include <exceptions/exceptions.h>
 #include <util/buffer.h>
 #include <dns/name.h>
+#include <dns/name_internal.h>
+#include <dns/labelsequence.h>
 #include <dns/messagerenderer.h>
 
-#include <cctype>
+#include <boost/array.hpp>
+#include <boost/static_assert.hpp>
+
+#include <limits>
 #include <cassert>
-#include <set>
+#include <vector>
 
+using namespace std;
 using namespace isc::util;
+using isc::dns::name::internal::maptolower;
 
 namespace isc {
 namespace dns {
 
 namespace {     // hide internal-only names from the public namespaces
 ///
-/// \brief The \c NameCompressNode class represents a pointer to a name
+/// \brief The \c OffsetItem class represents a pointer to a name
 /// rendered in the internal buffer for the \c MessageRendererImpl object.
 ///
-/// A \c MessageRendererImpl object maintains a set of the \c NameCompressNode
-/// objects, and searches the set for the position of the longest match
-/// (ancestor) name against each new name to be rendered into the buffer.
-struct NameCompressNode {
-    NameCompressNode(const MessageRenderer& renderer,
-                     const OutputBuffer& buffer, const size_t pos,
-                     const size_t len) :
-        renderer_(renderer), buffer_(buffer), pos_(pos), len_(len) {}
-    /// The renderer that performs name compression using the node.
-    /// This is kept in each node to detect the compression mode
-    /// (case-sensitive or not) in the comparison functor (\c NameCompare).
-    const MessageRenderer& renderer_;
-    /// The buffer in which the corresponding name is rendered.
-    const OutputBuffer& buffer_;
+/// A \c MessageRendererImpl object maintains a set of \c OffsetItem
+/// objects in a hash table, and searches the table for the position of the
+/// longest match (ancestor) name against each new name to be rendered into
+/// the buffer.
+struct OffsetItem {
+    OffsetItem(size_t hash, size_t pos, size_t len) :
+        hash_(hash), pos_(pos), len_(len)
+    {}
+
+    /// The hash value for the stored name calculated by LabelSequence.getHash.
+    /// This will help make name comparison in \c NameCompare more efficient.
+    size_t hash_;
+
     /// The position (offset from the beginning) in the buffer where the
     /// name starts.
     uint16_t pos_;
-    /// The length of the corresponding name.
+
+    /// The length of the corresponding sequence (which is a domain name).
     uint16_t len_;
 };
 
+/// \brief The \c NameCompare class is a functor that checks equality
+/// between the name corresponding to an \c OffsetItem object and the name
+/// consists of labels represented by a \c LabelSequence object.
 ///
-/// \brief The \c NameCompare class is a functor that gives ordering among
-/// \c NameCompressNode objects stored in \c MessageRendererImpl::nodeset_.
-///
-/// Its only public method as a functor, \c operator(), gives the ordering
-/// between two \c NameCompressNode objects in terms of equivalence, that is,
-/// returns whether one is "less than" the other.
-/// For our purpose we only need to distinguish two different names, so the
-/// ordering is different from the canonical DNS name order used in DNSSEC;
-/// basically, it gives the case-insensitive ordering of the two names as their
-/// textual representation.
-struct NameCompare : public std::binary_function<NameCompressNode,
-                                                 NameCompressNode,
-                                                 bool> {
-    ///
-    /// Returns true if n1 < n2 as a result of case-insensitive comparison;
-    /// otherwise return false.
+/// Template parameter CASE_SENSITIVE determines whether to ignore the case
+/// of the names.  This policy doesn't change throughout the lifetime of
+/// this object, so we separate these using template to avoid unnecessary
+/// condition check.
+template <bool CASE_SENSITIVE>
+struct NameCompare {
+    /// \brief Constructor
     ///
-    /// The name corresponding to \c n1 or \c n2 may be compressed, in which
-    /// case we must follow the compression pointer in the associated buffer.
-    /// The helper private method \c nextPosition() gives the position in the
-    /// buffer for the next character, taking into account compression.
-    ///
-    bool operator()(const NameCompressNode& n1,
-                    const NameCompressNode& n2) const
-    {
-        if (n1.len_ < n2.len_) {
-            return (true);
-        } else if (n1.len_ > n2.len_) {
+    /// \param buffer The buffer for rendering used in the caller renderer
+    /// \param name_buf An input buffer storing the wire-format data of the
+    /// name to be newly rendered (and only that data).
+    /// \param hash The hash value for the name.
+    NameCompare(const OutputBuffer& buffer, InputBuffer& name_buf,
+                size_t hash) :
+        buffer_(&buffer), name_buf_(&name_buf), hash_(hash)
+    {}
+
+    bool operator()(const OffsetItem& item) const {
+        // Trivial inequality check.  If either the hash or the total length
+        // doesn't match, the names are obviously different.
+        if (item.hash_  != hash_ || item.len_ != name_buf_->getLength()) {
             return (false);
         }
 
-        const bool case_sensitive =
-            (n1.renderer_.getCompressMode() == MessageRenderer::CASE_SENSITIVE);
-
-        uint16_t pos1 = n1.pos_;
-        uint16_t pos2 = n2.pos_;
-        uint16_t l1 = 0;
-        uint16_t l2 = 0;
-        for (uint16_t i = 0; i < n1.len_; i++, pos1++, pos2++) {
-            pos1 = nextPosition(n1.buffer_, pos1, l1);
-            pos2 = nextPosition(n2.buffer_, pos2, l2);
-            if (case_sensitive) {
-                if (n1.buffer_[pos1] < n2.buffer_[pos2]) {
-                    return (true);
-                } else if (n1.buffer_[pos1] > n2.buffer_[pos2]) {
+        // Compare the name data, character-by-character.
+        // item_pos keeps track of the position in the buffer corresponding to
+        // the character to compare.  item_label_len is the number of
+        // characters in the labels where the character pointed by item_pos
+        // belongs.  When it reaches zero, nextPosition() identifies the
+        // position for the subsequent label, taking into account name
+        // compression, and resets item_label_len to the length of the new
+        // label.
+        name_buf_->setPosition(0); // buffer can be reused, so reset position
+        uint16_t item_pos = item.pos_;
+        uint16_t item_label_len = 0;
+        for (size_t i = 0; i < item.len_; ++i, ++item_pos) {
+            item_pos = nextPosition(*buffer_, item_pos, item_label_len);
+            const unsigned char ch1 = (*buffer_)[item_pos];
+            const unsigned char ch2 = name_buf_->readUint8();
+            if (CASE_SENSITIVE) {
+                if (ch1 != ch2) {
                     return (false);
                 }
             } else {
-                if (tolower(n1.buffer_[pos1]) < tolower(n2.buffer_[pos2])) {
-                    return (true);
-                } else if (tolower(n1.buffer_[pos1]) >
-                           tolower(n2.buffer_[pos2])) {
+                if (maptolower[ch1] != maptolower[ch2]) {
                     return (false);
                 }
             }
         }
 
-        return (false);
+        return (true);
     }
 
 private:
@@ -138,6 +141,10 @@ private:
         }
         return (pos);
     }
+
+    const OutputBuffer* buffer_;
+    InputBuffer* name_buf_;
+    const size_t hash_;
 };
 }
 
@@ -148,20 +155,60 @@ private:
 /// The implementation is hidden from applications.  We can refer to specific
 /// members of this class only within the implementation source file.
 ///
+/// It internally holds a hash table for OffsetItem objects corresponding
+/// to portions of names rendered in this renderer.  The offset information
+/// is used to compress subsequent names to be rendered.
 struct MessageRenderer::MessageRendererImpl {
-    /// \brief Constructor from an output buffer.
-    ///
+    // The size of hash buckets and number of hash entries per bucket for
+    // which space is preallocated and kept reserved for subsequent rendering
+    // to provide better performance.  These values are derived from the
+    // BIND 9 implementation that uses a similar hash table.
+    static const size_t BUCKETS = 64;
+    static const size_t RESERVED_ITEMS = 16;
+    static const uint16_t NO_OFFSET = 65535; // used as a marker of 'not found'
+
+    /// \brief Constructor
     MessageRendererImpl() :
-        nbuffer_(Name::MAX_WIRE), msglength_limit_(512),
-        truncated_(false), compress_mode_(MessageRenderer::CASE_INSENSITIVE)
-    {}
-    /// A local working buffer to convert each given name into wire format.
-    /// This could be a local variable of the \c writeName() method, but
-    /// we keep it in the class so that we can reuse it and avoid construction
-    /// overhead.
-    OutputBuffer nbuffer_;
-    /// A set of compression pointers.
-    std::set<NameCompressNode, NameCompare> nodeset_;
+        msglength_limit_(512), truncated_(false),
+        compress_mode_(MessageRenderer::CASE_INSENSITIVE)
+    {
+        // Reserve some spaces for hash table items.
+        for (size_t i = 0; i < BUCKETS; ++i) {
+            table_[i].reserve(RESERVED_ITEMS);
+        }
+    }
+
+    uint16_t findOffset(const OutputBuffer& buffer, InputBuffer& name_buf,
+                        size_t hash, bool case_sensitive) const
+    {
+        // Find a matching entry, if any.  We use some heuristics here: often
+        // the same name appers consecutively (like repeating the same owner
+        // name for a single RRset), so in case there's a collision in the
+        // bucket it will be more likely to find it in the tail side of the
+        // bucket.
+        const size_t bucket_id = hash % BUCKETS;
+        vector<OffsetItem>::const_reverse_iterator found;
+        if (case_sensitive) {
+            found = find_if(table_[bucket_id].rbegin(),
+                            table_[bucket_id].rend(),
+                            NameCompare<true>(buffer, name_buf, hash));
+        } else {
+            found = find_if(table_[bucket_id].rbegin(),
+                            table_[bucket_id].rend(),
+                            NameCompare<false>(buffer, name_buf, hash));
+        }
+        if (found != table_[bucket_id].rend()) {
+            return (found->pos_);
+        }
+        return (NO_OFFSET);
+    }
+
+    void addOffset(size_t hash, size_t offset, size_t len) {
+        table_[hash % BUCKETS].push_back(OffsetItem(hash, offset, len));
+    }
+
+    // The hash table for the (offset + position in the buffer) entries
+    vector<OffsetItem> table_[BUCKETS];
     /// The maximum length of rendered data that can fit without
     /// truncation.
     uint16_t msglength_limit_;
@@ -170,6 +217,11 @@ struct MessageRenderer::MessageRendererImpl {
     bool truncated_;
     /// The name compression mode.
     CompressMode compress_mode_;
+
+    // Placeholder for hash values as they are calculated in writeName().
+    // Note: we may want to make it a local variable of writeName() if it
+    // works more efficiently.
+    boost::array<size_t, Name::MAX_LABELS> seq_hashes_;
 };
 
 MessageRenderer::MessageRenderer() :
@@ -184,11 +236,22 @@ MessageRenderer::~MessageRenderer() {
 void
 MessageRenderer::clear() {
     AbstractMessageRenderer::clear();
-    impl_->nbuffer_.clear();
-    impl_->nodeset_.clear();
     impl_->msglength_limit_ = 512;
     impl_->truncated_ = false;
     impl_->compress_mode_ = CASE_INSENSITIVE;
+
+    // Clear the hash table.  We reserve the minimum space for possible
+    // subsequent use of the renderer.
+    for (size_t i = 0; i < MessageRendererImpl::BUCKETS; ++i) {
+        if (impl_->table_[i].size() > MessageRendererImpl::RESERVED_ITEMS) {
+            // Trim excessive capacity: swap ensures the new capacity is only
+            // reasonably large for the reserved space.
+            vector<OffsetItem> new_table;
+            new_table.reserve(MessageRendererImpl::RESERVED_ITEMS);
+            new_table.swap(impl_->table_[i]);
+        }
+        impl_->table_[i].clear();
+    }
 }
 
 size_t
@@ -218,59 +281,81 @@ MessageRenderer::getCompressMode() const {
 
 void
 MessageRenderer::setCompressMode(const CompressMode mode) {
+    if (getLength() != 0) {
+        isc_throw(isc::InvalidParameter,
+                  "compress mode cannot be changed during rendering");
+    }
     impl_->compress_mode_ = mode;
 }
 
 void
 MessageRenderer::writeName(const Name& name, const bool compress) {
-    impl_->nbuffer_.clear();
-    name.toWire(impl_->nbuffer_);
-
-    unsigned int i;
-    std::set<NameCompressNode, NameCompare>::const_iterator notfound =
-        impl_->nodeset_.end();
-    std::set<NameCompressNode, NameCompare>::const_iterator n = notfound;
-
-    // Find the longest ancestor name in the rendered set that matches the
-    // given name.
-    for (i = 0; i < impl_->nbuffer_.getLength(); i += impl_->nbuffer_[i] + 1) {
-        // skip the trailing null label
-        if (impl_->nbuffer_[i] == 0) {
-            continue;
+    LabelSequence sequence(name);
+    const size_t nlabels = sequence.getLabelCount();
+    size_t data_len;
+    const char* data;
+
+    // Find the offset in the offset table whose name gives the longest
+    // match against the name to be rendered.
+    size_t nlabels_uncomp;
+    uint16_t ptr_offset = MessageRendererImpl::NO_OFFSET;
+    const bool case_sensitive = (impl_->compress_mode_ ==
+                                 MessageRenderer::CASE_SENSITIVE);
+    for (nlabels_uncomp = 0; nlabels_uncomp < nlabels; ++nlabels_uncomp) {
+        data = sequence.getData(&data_len);
+        if (data_len == 1) { // trailing dot.
+            ++nlabels_uncomp;
+            break;
         }
-        n = impl_->nodeset_.find(NameCompressNode(*this, impl_->nbuffer_, i,
-                                                  impl_->nbuffer_.getLength() -
-                                                  i));
-        if (n != notfound) {
+        // write with range check for safety
+        impl_->seq_hashes_.at(nlabels_uncomp) =
+            sequence.getHash(impl_->compress_mode_);
+        InputBuffer name_buf(data, data_len);
+        ptr_offset = impl_->findOffset(getBuffer(), name_buf,
+                                       impl_->seq_hashes_[nlabels_uncomp],
+                                       case_sensitive);
+        if (ptr_offset != MessageRendererImpl::NO_OFFSET) {
             break;
         }
+        sequence.stripLeft(1);
     }
 
-    // Record the current offset before extending the buffer.
-    const size_t offset = getLength();
-    // Write uncompress part...
-    writeData(impl_->nbuffer_.getData(),
-              compress ? i : impl_->nbuffer_.getLength());
-    if (compress && n != notfound) {
-        // ...and compression pointer if available.
-        uint16_t pointer = (*n).pos_;
-        pointer |= Name::COMPRESS_POINTER_MARK16;
-        writeUint16(pointer);
+    // Record the current offset before updating the offset table
+    size_t offset = getLength();
+    // Write uncompress part:
+    if (nlabels_uncomp > 0 || !compress) {
+        LabelSequence uncomp_sequence(name);
+        if (compress && nlabels > nlabels_uncomp) {
+            // If there's compressed part, strip off that part.
+            uncomp_sequence.stripRight(nlabels - nlabels_uncomp);
+        }
+        data = uncomp_sequence.getData(&data_len);
+        writeData(data, data_len);
+    }
+    // And write compression pointer if available:
+    if (compress && ptr_offset != MessageRendererImpl::NO_OFFSET) {
+        ptr_offset |= Name::COMPRESS_POINTER_MARK16;
+        writeUint16(ptr_offset);
     }
 
-    // Finally, add to the set the newly rendered name and its ancestors that
-    // have not been in the set.
-    for (unsigned int j = 0; j < i; j += impl_->nbuffer_[j] + 1) {
-        if (impl_->nbuffer_[j] == 0) {
-            continue;
+    // Finally, record the offset and length for each uncompressed sequence
+    // in the hash table.  The renderer's buffer has just stored the
+    // corresponding data, so we use the rendered data to get the length
+    // of each label of the names.
+    size_t seqlen = name.getLength();
+    for (size_t i = 0; i < nlabels_uncomp; ++i) {
+        const uint8_t label_len = getBuffer()[offset];
+        if (label_len == 0) { // offset for root doesn't need to be stored.
+            break;
         }
-        if (offset + j > Name::MAX_COMPRESS_POINTER) {
+        if (offset > Name::MAX_COMPRESS_POINTER) {
             break;
         }
-        impl_->nodeset_.insert(NameCompressNode(*this, getBuffer(),
-                                                offset + j,
-                                                impl_->nbuffer_.getLength() -
-                                                j));
+        // Store the tuple of <hash, offset, len> to the table.  Note that we
+        // already know the hash value for each name.
+        impl_->addOffset(impl_->seq_hashes_[i], offset, seqlen);
+        offset += (label_len + 1);
+        seqlen -= (label_len + 1);
     }
 }
 

+ 10 - 0
src/lib/dns/messagerenderer.h

@@ -359,7 +359,17 @@ public:
     virtual CompressMode getCompressMode() const;
     virtual void setTruncated();
     virtual void setLengthLimit(size_t len);
+
+    /// This implementation does not allow this call in the middle of
+    /// rendering (i.e. after at least one name is rendered) due to
+    /// restriction specific to the internal implementation.  Such attempts
+    /// will result in an \c isc::InvalidParameter exception.
+    ///
+    /// This shouldn't be too restrictive in practice; there's no known
+    /// practical case for such a mixed compression policy in a single
+    /// message.
     virtual void setCompressMode(CompressMode mode);
+
     virtual void clear();
     virtual void writeName(const Name& name, bool compress = true);
 private:

+ 14 - 8
src/lib/dns/name.cc

@@ -23,11 +23,13 @@
 #include <util/buffer.h>
 #include <dns/exceptions.h>
 #include <dns/name.h>
+#include <dns/name_internal.h>
 #include <dns/messagerenderer.h>
 
 using namespace std;
 using namespace isc::util;
 using isc::dns::NameComparisonResult;
+using namespace isc::dns::name::internal;
 
 namespace isc {
 namespace dns {
@@ -46,12 +48,12 @@ namespace {
 /// we chose the naive but simple hardcoding approach.
 ///
 /// These definitions are derived from BIND 9's libdns module.
-/// Note: it was not clear why the maptolower array was needed rather than
-/// using the standard tolower() function.  It was perhaps due performance
-/// concern, but we were not sure.  Even if it was performance reasons, we
-/// should carefully assess the effect via benchmarking to avoid the pitfall of 
-/// premature optimization.  We should revisit this point later.
-static const char digitvalue[256] = {
+/// Note: we could use the standard tolower() function instead of the
+/// maptolower array, but a benchmark indicated that the private array could
+/// improve the performance of message rendering (which internally uses the
+/// array heavily) about 27%.  Since we want to achieve very good performance
+/// for message rendering in some cases, we'll keep using it.
+const char digitvalue[256] = {
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 16
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 32
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 48
@@ -69,8 +71,11 @@ static const char digitvalue[256] = {
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 256
 };
+}
 
-static const unsigned char maptolower[] = {
+namespace name {
+namespace internal {
+const unsigned char maptolower[] = {
     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -104,7 +109,8 @@ static const unsigned char maptolower[] = {
     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
 };
-}
+} // end of internal
+} // end of name
 
 namespace {
 ///

+ 43 - 0
src/lib/dns/name_internal.h

@@ -0,0 +1,43 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef __NAME_INTERNAL_H
+#define __NAME_INTERNAL_H 1
+
+// This is effectively a "private" namespace for the Name class implementation,
+// but exposed publicly so the definitions in it can be shared with other
+// modules of the library (as of its introduction, used by LabelSequence and
+// MessageRenderer).  It's not expected to be used even by normal applications.
+// This header file is therefore not expected to be installed as part of the
+// library.
+//
+// Note: if it turns out that we need this shortcut for many other places
+// we may even want to make it expose to other BIND 10 modules, but for now
+// we'll keep it semi-private (note also that except for very performance
+// sensitive applications the standard std::tolower() function should be just
+// sufficient).
+namespace isc {
+namespace dns {
+namespace name {
+namespace internal {
+extern const unsigned char maptolower[];
+} // end of internal
+} // end of name
+} // end of dns
+} // end of isc
+#endif // __NAME_INTERNAL_H
+
+// Local Variables:
+// mode: c++
+// End:

+ 159 - 51
src/lib/dns/tests/labelsequence_unittest.cc

@@ -13,11 +13,20 @@
 // PERFORMANCE OF THIS SOFTWARE.
 
 #include <dns/labelsequence.h>
+#include <dns/name.h>
 #include <exceptions/exceptions.h>
 
 #include <gtest/gtest.h>
 
+#include <boost/functional/hash.hpp>
+
+#include <string>
+#include <set>
+
 using namespace isc::dns;
+using namespace std;
+
+namespace {
 
 class LabelSequenceTest : public ::testing::Test {
 public:
@@ -37,51 +46,14 @@ public:
 
 // Basic equality tests
 TEST_F(LabelSequenceTest, equals_sensitive) {
-    EXPECT_TRUE(ls1.equals(ls1));
-    EXPECT_FALSE(ls1.equals(ls2));
-    EXPECT_TRUE(ls1.equals(ls3));
-    EXPECT_FALSE(ls1.equals(ls4));
-    EXPECT_FALSE(ls1.equals(ls5));
-    EXPECT_FALSE(ls1.equals(ls6));
-    EXPECT_FALSE(ls1.equals(ls7));
-    EXPECT_FALSE(ls1.equals(ls8));
-
-    EXPECT_FALSE(ls2.equals(ls1));
-    EXPECT_TRUE(ls2.equals(ls2));
-    EXPECT_FALSE(ls2.equals(ls3));
-    EXPECT_FALSE(ls2.equals(ls4));
-    EXPECT_FALSE(ls2.equals(ls5));
-    EXPECT_FALSE(ls2.equals(ls6));
-    EXPECT_FALSE(ls2.equals(ls7));
-    EXPECT_FALSE(ls2.equals(ls8));
-
-    EXPECT_FALSE(ls4.equals(ls1));
-    EXPECT_FALSE(ls4.equals(ls2));
-    EXPECT_FALSE(ls4.equals(ls3));
-    EXPECT_TRUE(ls4.equals(ls4));
-    EXPECT_FALSE(ls4.equals(ls5));
-    EXPECT_FALSE(ls4.equals(ls6));
-    EXPECT_FALSE(ls4.equals(ls7));
-    EXPECT_FALSE(ls4.equals(ls8));
-
-    EXPECT_FALSE(ls5.equals(ls1));
-    EXPECT_FALSE(ls5.equals(ls2));
-    EXPECT_FALSE(ls5.equals(ls3));
-    EXPECT_FALSE(ls5.equals(ls4));
-    EXPECT_TRUE(ls5.equals(ls5));
-    EXPECT_FALSE(ls5.equals(ls6));
-    EXPECT_FALSE(ls5.equals(ls7));
-    EXPECT_FALSE(ls5.equals(ls8));
-}
-
-TEST_F(LabelSequenceTest, equals_insensitive) {
     EXPECT_TRUE(ls1.equals(ls1, true));
     EXPECT_FALSE(ls1.equals(ls2, true));
     EXPECT_TRUE(ls1.equals(ls3, true));
     EXPECT_FALSE(ls1.equals(ls4, true));
-    EXPECT_TRUE(ls1.equals(ls5, true));
-    EXPECT_TRUE(ls1.equals(ls6, true));
+    EXPECT_FALSE(ls1.equals(ls5, true));
+    EXPECT_FALSE(ls1.equals(ls6, true));
     EXPECT_FALSE(ls1.equals(ls7, true));
+    EXPECT_FALSE(ls1.equals(ls8, true));
 
     EXPECT_FALSE(ls2.equals(ls1, true));
     EXPECT_TRUE(ls2.equals(ls2, true));
@@ -90,14 +62,7 @@ TEST_F(LabelSequenceTest, equals_insensitive) {
     EXPECT_FALSE(ls2.equals(ls5, true));
     EXPECT_FALSE(ls2.equals(ls6, true));
     EXPECT_FALSE(ls2.equals(ls7, true));
-
-    EXPECT_TRUE(ls3.equals(ls1, true));
-    EXPECT_FALSE(ls3.equals(ls2, true));
-    EXPECT_TRUE(ls3.equals(ls3, true));
-    EXPECT_FALSE(ls3.equals(ls4, true));
-    EXPECT_TRUE(ls3.equals(ls5, true));
-    EXPECT_TRUE(ls3.equals(ls6, true));
-    EXPECT_FALSE(ls3.equals(ls7, true));
+    EXPECT_FALSE(ls2.equals(ls8, true));
 
     EXPECT_FALSE(ls4.equals(ls1, true));
     EXPECT_FALSE(ls4.equals(ls2, true));
@@ -106,14 +71,58 @@ TEST_F(LabelSequenceTest, equals_insensitive) {
     EXPECT_FALSE(ls4.equals(ls5, true));
     EXPECT_FALSE(ls4.equals(ls6, true));
     EXPECT_FALSE(ls4.equals(ls7, true));
+    EXPECT_FALSE(ls4.equals(ls8, true));
 
-    EXPECT_TRUE(ls5.equals(ls1, true));
+    EXPECT_FALSE(ls5.equals(ls1, true));
     EXPECT_FALSE(ls5.equals(ls2, true));
-    EXPECT_TRUE(ls5.equals(ls3, true));
+    EXPECT_FALSE(ls5.equals(ls3, true));
     EXPECT_FALSE(ls5.equals(ls4, true));
     EXPECT_TRUE(ls5.equals(ls5, true));
-    EXPECT_TRUE(ls5.equals(ls6, true));
+    EXPECT_FALSE(ls5.equals(ls6, true));
     EXPECT_FALSE(ls5.equals(ls7, true));
+    EXPECT_FALSE(ls5.equals(ls8, true));
+}
+
+TEST_F(LabelSequenceTest, equals_insensitive) {
+    EXPECT_TRUE(ls1.equals(ls1));
+    EXPECT_FALSE(ls1.equals(ls2));
+    EXPECT_TRUE(ls1.equals(ls3));
+    EXPECT_FALSE(ls1.equals(ls4));
+    EXPECT_TRUE(ls1.equals(ls5));
+    EXPECT_TRUE(ls1.equals(ls6));
+    EXPECT_FALSE(ls1.equals(ls7));
+
+    EXPECT_FALSE(ls2.equals(ls1));
+    EXPECT_TRUE(ls2.equals(ls2));
+    EXPECT_FALSE(ls2.equals(ls3));
+    EXPECT_FALSE(ls2.equals(ls4));
+    EXPECT_FALSE(ls2.equals(ls5));
+    EXPECT_FALSE(ls2.equals(ls6));
+    EXPECT_FALSE(ls2.equals(ls7));
+
+    EXPECT_TRUE(ls3.equals(ls1));
+    EXPECT_FALSE(ls3.equals(ls2));
+    EXPECT_TRUE(ls3.equals(ls3));
+    EXPECT_FALSE(ls3.equals(ls4));
+    EXPECT_TRUE(ls3.equals(ls5));
+    EXPECT_TRUE(ls3.equals(ls6));
+    EXPECT_FALSE(ls3.equals(ls7));
+
+    EXPECT_FALSE(ls4.equals(ls1));
+    EXPECT_FALSE(ls4.equals(ls2));
+    EXPECT_FALSE(ls4.equals(ls3));
+    EXPECT_TRUE(ls4.equals(ls4));
+    EXPECT_FALSE(ls4.equals(ls5));
+    EXPECT_FALSE(ls4.equals(ls6));
+    EXPECT_FALSE(ls4.equals(ls7));
+
+    EXPECT_TRUE(ls5.equals(ls1));
+    EXPECT_FALSE(ls5.equals(ls2));
+    EXPECT_TRUE(ls5.equals(ls3));
+    EXPECT_FALSE(ls5.equals(ls4));
+    EXPECT_TRUE(ls5.equals(ls5));
+    EXPECT_TRUE(ls5.equals(ls6));
+    EXPECT_FALSE(ls5.equals(ls7));
 }
 
 void
@@ -124,6 +133,9 @@ getDataCheck(const char* expected_data, size_t expected_len,
     const char* data = ls.getData(&len);
     ASSERT_EQ(expected_len, len) << "Expected data: " << expected_data <<
                                     " name: " << ls.getName().toText();
+    EXPECT_EQ(expected_len, ls.getDataLength()) <<
+        "Expected data: " << expected_data <<
+        " name: " << ls.getName().toText();
     for (size_t i = 0; i < len; ++i) {
         EXPECT_EQ(expected_data[i], data[i]) << "Difference at pos " << i <<
                                                 ": Expected data: " <<
@@ -251,3 +263,99 @@ TEST_F(LabelSequenceTest, isAbsolute) {
     ls3.stripLeft(2);
     ASSERT_TRUE(ls3.isAbsolute());
 }
+
+// A helper function that constructs the textual representation of a name label
+// character for the given char value in the form of \DDD
+string
+getNumericLabel(char ch) {
+    string result;
+    result.push_back(0x5c);     // push '\'
+    result.push_back(0x30 + ((ch / 100) % 10)); // encode the 1st digit
+    result.push_back(0x30 + ((ch / 10) % 10)); // encode the 2nd digit
+    result.push_back(0x30 + (ch % 10));        // encode the 3rd digit
+
+    return (result);
+}
+
+// The following are test data used in the getHash test below.  Normally
+// we use example/documentation domain names for testing, but in this case
+// we'd specifically like to use more realistic data, and are intentionally
+// using real-world samples: They are the NS names of root and some top level
+// domains as of this test.
+const char* const root_servers[] = {
+    "a.root-servers.net", "b.root-servers.net", "c.root-servers.net",
+    "d.root-servers.net", "e.root-servers.net", "f.root-servers.net",
+    "g.root-servers.net", "h.root-servers.net", "i.root-servers.net",
+    "j.root-servers.net", "k.root-servers.net", "l.root-servers.net",
+    "m.root-servers.net", NULL
+};
+const char* const gtld_servers[] = {
+    "a.gtld-servers.net", "b.gtld-servers.net", "c.gtld-servers.net",
+    "d.gtld-servers.net", "e.gtld-servers.net", "f.gtld-servers.net",
+    "g.gtld-servers.net", "h.gtld-servers.net", "i.gtld-servers.net",
+    "j.gtld-servers.net", "k.gtld-servers.net", "l.gtld-servers.net",
+    "m.gtld-servers.net", NULL
+};
+const char* const jp_servers[] = {
+    "a.dns.jp", "b.dns.jp", "c.dns.jp", "d.dns.jp", "e.dns.jp",
+    "f.dns.jp", "g.dns.jp", NULL
+};
+const char* const cn_servers[] = {
+    "a.dns.cn", "b.dns.cn", "c.dns.cn", "d.dns.cn", "e.dns.cn",
+    "ns.cernet.net", NULL
+};
+const char* const ca_servers[] = {
+    "k.ca-servers.ca", "e.ca-servers.ca", "a.ca-servers.ca", "z.ca-servers.ca",
+    "tld.isc-sns.net", "c.ca-servers.ca", "j.ca-servers.ca", "l.ca-servers.ca",
+    "sns-pb.isc.org", "f.ca-servers.ca", NULL
+};
+
+// A helper function used in the getHash test below.
+void
+hashDistributionCheck(const char* const* servers) {
+    const size_t BUCKETS = 64;  // constant used in the MessageRenderer
+    set<Name> names;
+    vector<size_t> hash_counts(BUCKETS);
+
+    // Store all test names and their super domain names (excluding the
+    // "root" label) in the set, calculates their hash values, and increments
+    // the counter for the corresponding hash "bucket".
+    for (size_t i = 0; servers[i] != NULL; ++i) {
+        const Name name(servers[i]);
+        for (size_t l = 0; l < name.getLabelCount() - 1; ++l) {
+            pair<set<Name>::const_iterator, bool> ret =
+                names.insert(name.split(l));
+            if (ret.second) {
+                hash_counts[LabelSequence((*ret.first)).getHash(false) %
+                            BUCKETS]++;
+            }
+        }
+    }
+
+    // See how many conflicts we have in the buckets.  For the testing purpose
+    // we expect there's at most 2 conflicts in each set, which is an
+    // arbitrary choice (it should happen to succeed with the hash function
+    // and data we are using; if it's not the case, maybe with an update to
+    // the hash implementation, we should revise the test).
+    for (size_t i = 0; i < BUCKETS; ++i) {
+        EXPECT_GE(3, hash_counts[i]);
+    }
+}
+
+TEST_F(LabelSequenceTest, getHash) {
+    // Trivial case.  The same sequence should have the same hash.
+    EXPECT_EQ(ls1.getHash(true), ls1.getHash(true));
+
+    // Check the case-insensitive mode behavior.
+    EXPECT_EQ(ls1.getHash(false), ls5.getHash(false));
+
+    // Check that the distribution of hash values is "not too bad" (such as
+    // everything has the same hash value due to a stupid bug).  It's
+    // difficult to check such things reliably.  We do some ad hoc tests here.
+    hashDistributionCheck(root_servers);
+    hashDistributionCheck(jp_servers);
+    hashDistributionCheck(cn_servers);
+    hashDistributionCheck(ca_servers);
+}
+
+}

+ 29 - 7
src/lib/dns/tests/messagerenderer_unittest.cc

@@ -21,12 +21,16 @@
 
 #include <gtest/gtest.h>
 
+#include <boost/lexical_cast.hpp>
+
+#include <string>
 #include <vector>
 
 using isc::UnitTestUtil;
 using isc::dns::Name;
 using isc::dns::MessageRenderer;
 using isc::util::OutputBuffer;
+using boost::lexical_cast;
 
 namespace {
 class MessageRendererTest : public ::testing::Test {
@@ -142,13 +146,15 @@ TEST_F(MessageRendererTest, writeNameMixedCaseCompress) {
     renderer.writeName(Name("a.example.com."));
     renderer.writeName(Name("b.eXample.com."));
 
-    // Change the compression mode in the middle of rendering.  This is an
-    // unusual operation and is unlikely to happen in practice, but is still
-    // allowed in this API.
-    renderer.setCompressMode(MessageRenderer::CASE_INSENSITIVE);
-    renderer.writeName(Name("c.b.EXAMPLE.com."));
-    EXPECT_PRED_FORMAT4(UnitTestUtil::matchWireData, renderer.getData(),
-                        renderer.getLength(), &data[0], data.size());
+    // Change the compression mode in the middle of rendering.  This is not
+    // allowed in this implementation.
+    EXPECT_THROW(renderer.setCompressMode(MessageRenderer::CASE_INSENSITIVE),
+                 isc::InvalidParameter);
+
+    // Once the renderer is cleared, it's okay again.
+    renderer.clear();
+    EXPECT_NO_THROW(renderer.setCompressMode(
+                        MessageRenderer::CASE_INSENSITIVE));
 }
 
 TEST_F(MessageRendererTest, writeRootName) {
@@ -211,4 +217,20 @@ TEST_F(MessageRendererTest, setBufferErrors) {
     renderer.setBuffer(&new_buffer);
     EXPECT_NO_THROW(renderer.setBuffer(NULL));
 }
+
+TEST_F(MessageRendererTest, manyRRs) {
+    // Render a large number of names, and the confirm the resulting wire
+    // data store the expected names in the correct order (1000 is an
+    // arbitrary choice).
+    for (size_t i = 0; i < 1000; ++i) {
+        renderer.writeName(Name(lexical_cast<std::string>(i) + ".example"));
+    }
+    isc::util::InputBuffer b(renderer.getData(), renderer.getLength());
+    for (size_t i = 0; i < 1000; ++i) {
+        EXPECT_EQ(Name(lexical_cast<std::string>(i) + ".example"), Name(b));
+    }
+    // This will trigger trimming excessive hash items.  It shouldn't cause
+    // any disruption.
+    EXPECT_NO_THROW(renderer.clear());
+}
 }

+ 23 - 19
src/lib/util/buffer.h

@@ -122,10 +122,10 @@ public:
     /// an exception of class \c isc::dns::InvalidBufferPosition will be thrown.
     /// \param position The new position (offset from the beginning of the
     /// buffer).
-    void setPosition(size_t position)
-    {
-        if (position > len_)
-            isc_throw(InvalidBufferPosition, "position is too large");
+    void setPosition(size_t position) {
+        if (position > len_) {
+            throwError("position is too large");
+        }
         position_ = position;
     }
     //@}
@@ -137,10 +137,9 @@ public:
     ///
     /// If the remaining length of the buffer is smaller than 8-bit, an
     /// exception of class \c isc::dns::InvalidBufferPosition will be thrown.
-    uint8_t readUint8()
-    {
+    uint8_t readUint8() {
         if (position_ + sizeof(uint8_t) > len_) {
-            isc_throw(InvalidBufferPosition, "read beyond end of buffer");
+            throwError("read beyond end of buffer");
         }
 
         return (data_[position_++]);
@@ -150,13 +149,12 @@ public:
     ///
     /// If the remaining length of the buffer is smaller than 16-bit, an
     /// exception of class \c isc::dns::InvalidBufferPosition will be thrown.
-    uint16_t readUint16()
-    {
+    uint16_t readUint16() {
         uint16_t data;
         const uint8_t* cp;
 
         if (position_ + sizeof(data) > len_) {
-            isc_throw(InvalidBufferPosition, "read beyond end of buffer");
+            throwError("read beyond end of buffer");
         }
 
         cp = &data_[position_];
@@ -171,13 +169,12 @@ public:
     ///
     /// If the remaining length of the buffer is smaller than 32-bit, an
     /// exception of class \c isc::dns::InvalidBufferPosition will be thrown.
-    uint32_t readUint32()
-    {
+    uint32_t readUint32() {
         uint32_t data;
         const uint8_t* cp;
 
         if (position_ + sizeof(data) > len_) {
-            isc_throw(InvalidBufferPosition, "read beyond end of buffer");
+            throwError("read beyond end of buffer");
         }
 
         cp = &data_[position_];
@@ -196,10 +193,9 @@ public:
     /// If the remaining length of the buffer is smaller than the specified
     /// length, an exception of class \c isc::dns::InvalidBufferPosition will
     /// be thrown.
-    void readData(void* data, size_t len)
-    {
+    void readData(void* data, size_t len) {
         if (position_ + len > len_) {
-            isc_throw(InvalidBufferPosition, "read beyond end of buffer");
+            throwError("read beyond end of buffer");
         }
 
         memcpy(data, &data_[position_], len);
@@ -215,10 +211,9 @@ public:
     /// @param Reference to a buffer (data will be stored there).
     /// @param Size specified number of bytes to read in a vector.
     ///
-    void readVector(std::vector<uint8_t>& data, size_t len)
-    {
+    void readVector(std::vector<uint8_t>& data, size_t len) {
         if (position_ + len > len_) {
-            isc_throw(InvalidBufferPosition, "read beyond end of buffer");
+            throwError("read beyond end of buffer");
         }
 
         data.resize(len);
@@ -226,6 +221,15 @@ public:
     }
 
 private:
+    /// \brief A common helper to throw an exception on invalid operation.
+    ///
+    /// Experiments showed that throwing from each method makes the buffer
+    /// operation slower, so we consolidate it here, and let the methods
+    /// call this.
+    static void throwError(const char* msg) {
+        isc_throw(InvalidBufferPosition, msg);
+    }
+
     size_t position_;
 
     // XXX: The following must be private, but for a short term workaround with