Parcourir la source

[1603] implemented LabelSequence::getHash(), which is needed for new renderer.
maptolower hidden in name.cc is now exposed to the LabelSequence implementation
in a "private" namespace.

JINMEI Tatuya il y a 13 ans
Parent
commit
2903127df1

+ 21 - 1
src/lib/dns/labelsequence.cc

@@ -13,9 +13,11 @@
 // PERFORMANCE OF THIS SOFTWARE.
 
 #include <dns/labelsequence.h>
+#include <dns/name_internal.h>
 #include <exceptions/exceptions.h>
 
-#include <iostream>
+#include <boost/functional/hash.hpp>
+
 namespace isc {
 namespace dns {
 
@@ -79,5 +81,23 @@ LabelSequence::isAbsolute() const {
     return (last_label_ == name_->offsets_.size());
 }
 
+size_t
+LabelSequence::getHash(bool case_sensitive) const {
+    size_t length;
+    const char* s = getData(&length);
+    if (length > 16) {
+        length = 16;
+    }
+
+    size_t hash_val = 0;
+    while (length > 0) {
+        const unsigned char c = *s++;
+        boost::hash_combine(hash_val, case_sensitive ? c :
+                            isc::dns::name::internal::maptolower[c]);
+        --length;
+    }
+    return (hash_val);
+}
+
 } // end namespace dns
 } // end namespace isc

+ 25 - 0
src/lib/dns/labelsequence.h

@@ -134,6 +134,31 @@ public:
     /// \return Reference to the original Name object
     const Name& getName() const { return (*name_); }
 
+    /// \brief Calculate a simple hash for the label sequence.
+    ///
+    /// This method calculates a hash value for the label sequence as binary
+    /// data.  If \c case_sensitive is false, it ignores the case stored in
+    /// the labels; specifically, it normalizes the labels by converting all
+    /// upper case characters to lower case ones and calculates the hash value
+    /// for the result.
+    ///
+    /// This method is intended to provide a lightweight way to store a
+    /// relatively small number of label sequences in a hash table.
+    /// For this reason it only takes into account data up to 16 octets
+    /// (16 was derived from BIND 9's implementation).  Also, the function does
+    /// not provide any unpredictability; a specific sequence will always have
+    /// the same hash value.  It should therefore not be used in the context
+    /// where an untrusted third party can mount a denial of service attack by
+    /// forcing the application to create a very large number of label
+    /// sequences that have the same hash value and expected to be stored in
+    /// a hash table.
+    ///
+    /// \exception None
+    ///
+    /// \param case_sensitive
+    /// \return A hash value for this label sequence.
+    size_t getHash(bool case_sensitive) const;
+
     /// \brief Checks whether the label sequence is absolute
     ///
     /// \return true if the last label is the root label

+ 14 - 8
src/lib/dns/name.cc

@@ -23,11 +23,13 @@
 #include <util/buffer.h>
 #include <dns/exceptions.h>
 #include <dns/name.h>
+#include <dns/name_internal.h>
 #include <dns/messagerenderer.h>
 
 using namespace std;
 using namespace isc::util;
 using isc::dns::NameComparisonResult;
+using namespace isc::dns::name::internal;
 
 namespace isc {
 namespace dns {
@@ -46,12 +48,12 @@ namespace {
 /// we chose the naive but simple hardcoding approach.
 ///
 /// These definitions are derived from BIND 9's libdns module.
-/// Note: it was not clear why the maptolower array was needed rather than
-/// using the standard tolower() function.  It was perhaps due performance
-/// concern, but we were not sure.  Even if it was performance reasons, we
-/// should carefully assess the effect via benchmarking to avoid the pitfall of 
-/// premature optimization.  We should revisit this point later.
-static const char digitvalue[256] = {
+/// Note: we could use the standard tolower() function instead of the
+/// maptolower array, but a benchmark indicated that the private array could
+/// improve the performance of message rendering (which internally uses the
+/// array heavily) about 27%.  Since we want to achieve very good performance
+/// for message rendering in some cases, we'll keep using it.
+const char digitvalue[256] = {
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 16
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 32
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 48
@@ -69,8 +71,11 @@ static const char digitvalue[256] = {
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 256
 };
+}
 
-static const unsigned char maptolower[] = {
+namespace name {
+namespace internal {
+const unsigned char maptolower[] = {
     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -104,7 +109,8 @@ static const unsigned char maptolower[] = {
     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
 };
-}
+} // end of internal
+} // end of name
 
 namespace {
 ///

+ 9 - 0
src/lib/dns/name.h

@@ -166,6 +166,15 @@ private:
     NameRelation relation_;
 };
 
+// This is effectively a "private" namespace, but exposed publicly so the
+// definitions in it can be shared with the LabelSequence class.  It's not
+// expected to be used
+namespace name {
+namespace internal {
+extern const unsigned char maptolower[];
+}
+}
+
 ///
 /// The \c Name class encapsulates DNS names.
 ///

+ 36 - 0
src/lib/dns/name_internal.h

@@ -0,0 +1,36 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef __NAME_INTERNAL_H
+#define __NAME_INTERNAL_H 1
+
+// This is effectively a "private" namespace for the Name class implementation,
+// but exposed publicly so the definitions in it can be shared with the
+// LabelSequence class.  It's not expected to be used even by other modules
+// of this library, much less by normal applications.  This header file is
+// therefore not expected to be installed as part of the library.
+namespace isc {
+namespace dns {
+namespace name {
+namespace internal {
+extern const unsigned char maptolower[];
+} // end of internal
+} // end of name
+} // end of dns
+} // end of isc
+#endif // __NAME_INTERNAL_H
+
+// Local Variables:
+// mode: c++
+// End:

+ 101 - 0
src/lib/dns/tests/labelsequence_unittest.cc

@@ -13,11 +13,18 @@
 // PERFORMANCE OF THIS SOFTWARE.
 
 #include <dns/labelsequence.h>
+#include <dns/name.h>
 #include <exceptions/exceptions.h>
 
 #include <gtest/gtest.h>
 
+#include <boost/functional/hash.hpp>
+
+#include <string>
+#include <set>
+
 using namespace isc::dns;
+using namespace std;
 
 namespace {
 
@@ -257,4 +264,98 @@ TEST_F(LabelSequenceTest, isAbsolute) {
     ASSERT_TRUE(ls3.isAbsolute());
 }
 
+// A helper function that constructs the textual representation of a name label
+// character for the given char value in the form of \DDD
+string
+getNumericLabel(char ch) {
+    string result;
+    result.push_back(0x5c);     // push '\'
+    result.push_back(0x30 + ((ch / 100) % 10)); // encode the 1st digit
+    result.push_back(0x30 + ((ch / 10) % 10)); // encode the 2nd digit
+    result.push_back(0x30 + (ch % 10));        // encode the 3rd digit
+
+    return (result);
+}
+
+// The following are test data used in the getHash test below.  Normally
+// we use example/documentation domain names for testing, but in this case
+// we'd specifically like to use more realistic data, and are intentionally
+// using real-world samples: They are the NS names of root and some top level
+// domains as of this test.
+const char* const root_servers[] = {
+    "a.root-servers.net", "b.root-servers.net", "c.root-servers.net",
+    "d.root-servers.net", "e.root-servers.net", "f.root-servers.net",
+    "g.root-servers.net", "h.root-servers.net", "i.root-servers.net",
+    "j.root-servers.net", "k.root-servers.net", "l.root-servers.net",
+    "m.root-servers.net", NULL
+};
+const char* const gtld_servers[] = {
+    "a.gtld-servers.net", "b.gtld-servers.net", "c.gtld-servers.net",
+    "d.gtld-servers.net", "e.gtld-servers.net", "f.gtld-servers.net",
+    "g.gtld-servers.net", "h.gtld-servers.net", "i.gtld-servers.net",
+    "j.gtld-servers.net", "k.gtld-servers.net", "l.gtld-servers.net",
+    "m.gtld-servers.net", NULL
+};
+const char* const jp_servers[] = {
+    "a.dns.jp", "b.dns.jp", "c.dns.jp", "d.dns.jp", "e.dns.jp",
+    "f.dns.jp", "g.dns.jp", NULL
+};
+const char* const cn_servers[] = {
+    "a.dns.cn", "b.dns.cn", "c.dns.cn", "d.dns.cn", "e.dns.cn",
+    "ns.cernet.net", NULL
+};
+const char* const ca_servers[] = {
+    "k.ca-servers.ca", "e.ca-servers.ca", "a.ca-servers.ca", "z.ca-servers.ca",
+    "tld.isc-sns.net", "c.ca-servers.ca", "j.ca-servers.ca", "l.ca-servers.ca",
+    "sns-pb.isc.org", "f.ca-servers.ca", NULL
+};
+
+// A helper function used in the getHash test below.
+void
+hashDistributionCheck(const char* const* servers) {
+    const size_t BUCKETS = 64;  // constant used in the MessageRenderer
+    set<Name> names;
+    vector<size_t> hash_counts(BUCKETS);
+
+    // Store all test names and their super domain names (excluding the
+    // "root" label) in the set, calculates their hash values, and increments
+    // the counter for the corresponding hash "bucket".
+    for (size_t i = 0; servers[i] != NULL; ++i) {
+        const Name name(servers[i]);
+        for (size_t l = 0; l < name.getLabelCount() - 1; ++l) {
+            pair<set<Name>::const_iterator, bool> ret =
+                names.insert(name.split(l));
+            if (ret.second) {
+                hash_counts[LabelSequence((*ret.first)).getHash(false) %
+                            BUCKETS]++;
+            }
+        }
+    }
+
+    // See how many conflicts we have in the buckets.  For the testing purpose
+    // we expect there's at most 2 conflicts in each set, which is an
+    // arbitrary choice (it should happen to succeed with the hash function
+    // and data we are using; if it's not the case, maybe with an update to
+    // the hash implementation, we should revise the test).
+    for (size_t i = 0; i < BUCKETS; ++i) {
+        EXPECT_GE(3, hash_counts[i]);
+    }
+}
+
+TEST_F(LabelSequenceTest, getHash) {
+    // Trivial case.  The same sequence should have the same hash.
+    EXPECT_EQ(ls1.getHash(true), ls1.getHash(true));
+
+    // Check the case-insensitive mode behavior.
+    EXPECT_EQ(ls1.getHash(false), ls5.getHash(false));
+
+    // Check that the distribution of hash values is "not too bad" (such as
+    // everything has the same hash value due to a stupid bug).  It's
+    // difficult to check such things reliably.  We do some ad hoc tests here.
+    hashDistributionCheck(root_servers);
+    hashDistributionCheck(jp_servers);
+    hashDistributionCheck(cn_servers);
+    hashDistributionCheck(ca_servers);
+}
+
 }