Browse Source

[1383] Produce better debug messages for erroneous upstream responses

Stephen Morris 13 years ago
parent
commit
982fc000a3
2 changed files with 197 additions and 52 deletions
  1. 111 41
      src/lib/resolve/recursive_query.cc
  2. 86 11
      src/lib/resolve/resolve_messages.mes

+ 111 - 41
src/lib/resolve/recursive_query.cc

@@ -28,9 +28,9 @@
 #include <dns/opcode.h>
 #include <dns/exceptions.h>
 #include <dns/rdataclass.h>
-
 #include <resolve/resolve.h>
 #include <resolve/resolve_log.h>
+#include <resolve/resolve_messages.h>
 #include <cache/resolver_cache.h>
 #include <nsas/address_request_callback.h>
 #include <nsas/nameserver_address.h>
@@ -39,6 +39,7 @@
 #include <asiodns/dns_service.h>
 #include <asiodns/io_fetch.h>
 #include <asiolink/io_service.h>
+#include <resolve/response_classifier.h>
 #include <resolve/recursive_query.h>
 
 using namespace isc::dns;
@@ -430,7 +431,7 @@ private:
                       .arg(questionText(question_));
             isc::resolve::copyResponseMessage(incoming, answer_message_);
             cache_.update(*answer_message_);
-            return true;
+            return (true);
             break;
 
         case isc::resolve::ResponseClassifier::CNAME:
@@ -444,7 +445,7 @@ private:
                 LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_LONG_CHAIN)
                           .arg(questionText(question_));
                 makeSERVFAIL();
-                return true;
+                return (true);
             }
 
             LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_CNAME)
@@ -460,7 +461,7 @@ private:
             LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_FOLLOW_CNAME)
                       .arg(questionText(question_));
             doLookup();
-            return false;
+            return (false);
             break;
 
         case isc::resolve::ResponseClassifier::NXDOMAIN:
@@ -471,7 +472,7 @@ private:
             isc::resolve::copyResponseMessage(incoming, answer_message_);
             // no negcache yet
             //cache_.update(*answer_message_);
-            return true;
+            return (true);
             break;
 
         case isc::resolve::ResponseClassifier::REFERRAL:
@@ -520,7 +521,7 @@ private:
                 nsas_callback_out_ = true;
                 nsas_.lookup(cur_zone_, question_.getClass(),
                              nsas_callback_, ANY_OK, glue_hints);
-                return false;
+                return (false);
             } else {
                 // Referral was received but did not contain an NS RRset.
                 LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_NO_NS_RRSET)
@@ -528,48 +529,122 @@ private:
 
                 // TODO this will result in answering with the delegation. oh well
                 isc::resolve::copyResponseMessage(incoming, answer_message_);
-                return true;
+                return (true);
             }
             break;
+
         case isc::resolve::ResponseClassifier::TRUNCATED:
             // Truncated packet.  If the protocol we used for the last one is
             // UDP, re-query using TCP.  Otherwise regard it as an error.
             if (protocol_ == IOFetch::UDP) {
-                LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_TRUNCATED)
-                          .arg(questionText(question_));
+                LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS,
+                          RESLIB_TRUNCATED).arg(questionText(question_));
                 send(IOFetch::TCP);
-                return false;
+                return (false);
+            }
+
+            // Was a TCP query so we have received a packet over TCP with the
+            // TC bit set: report an error by dropping down to the common
+            // error code.
+
+        default:
+            // Some error in received packet it.  Report it and return SERVFAIL
+            // to the caller.
+            if (logger.isDebugEnabled()) {
+                reportResponseClassifierError(category, incoming.getRcode());
             }
-            // Was a TCP query so we have received a packet over TCP with the TC
-            // bit set: drop through to common error processing.
-            // TODO: Can we use what we have received instead of discarding it?
-
-        case isc::resolve::ResponseClassifier::EMPTY:
-        case isc::resolve::ResponseClassifier::EXTRADATA:
-        case isc::resolve::ResponseClassifier::INVNAMCLASS:
-        case isc::resolve::ResponseClassifier::INVTYPE:
-        case isc::resolve::ResponseClassifier::MISMATQUEST:
-        case isc::resolve::ResponseClassifier::MULTICLASS:
-        case isc::resolve::ResponseClassifier::NOTONEQUEST:
-        case isc::resolve::ResponseClassifier::NOTRESPONSE:
-        case isc::resolve::ResponseClassifier::NOTSINGLE:
-        case isc::resolve::ResponseClassifier::OPCODE:
-        case isc::resolve::ResponseClassifier::RCODE:
-            LOG_DEBUG(isc::resolve::logger, RESLIB_DBG_RESULTS, RESLIB_RCODE_ERR)
-                      .arg(questionText(question_));
-            // Should we try a different server rather than SERVFAIL?
             makeSERVFAIL();
-            return true;
-            break;
+            return (true);
         }
 
-        // Since we do not have a default in the switch above,
-        // the compiler should have errored on any missing case
-        // statements.
+        // If we get here, there is some serious logic error (or a missing
+        // "return").
         assert(false);
-        return true;
+        return (true);  // To keep the compiler happy
     }
-    
+
+    /// \brief Report classification-detected error
+    ///
+    /// When the response classifier has detected an error in the response from
+    /// an upstream query, this method is called to log a debug message giving
+    /// information about the problem.
+    ///
+    /// \param category Classification code for the packet
+    /// \param rcode RCODE value in the packet
+    void reportResponseClassifierError(ResponseClassifier::Category category,
+                                       const Rcode& rcode)
+    {
+        // We could set up a table of response classifications to message
+        // IDs here and index into that table.  But given that (a) C++ does
+        // not have C's named initializers, (b) the codes for the
+        // response classifier are in another module and (c) not all messages
+        // have the same number of arguments, the setup of the table would be
+        // almost as long as the code here: it would need to include a number
+        // of assertions to ensure that any change to the the response
+        // classifier codes was detected, and the checking logic would need to
+        // check that the numeric value of the code lay within the defined
+        // limits of the table.
+
+        if (category == ResponseClassifier::RCODE) {
+
+            // Special case as this message takes two arguments.
+            LOG_DEBUG(logger, RESLIB_DBG_RESULTS, RESLIB_RCODE_ERROR).
+                      arg(questionText(question_)).arg(rcode);
+
+        } else {
+
+            isc::log::MessageID message_id;
+            switch (category) {
+            case ResponseClassifier::TRUNCATED:
+                message_id = RESLIB_TCP_TRUNCATED;
+                break;
+
+            case ResponseClassifier::EMPTY:
+                message_id = RESLIB_EMPTY_RESPONSE;
+                break;
+
+            case ResponseClassifier::EXTRADATA:
+                message_id = RESLIB_EXTRADATA_RESPONSE;
+                break;
+
+            case ResponseClassifier::INVNAMCLASS:
+                message_id = RESLIB_INVALID_NAMECLASS_RESPONSE;
+                break;
+
+            case ResponseClassifier::INVTYPE:
+                message_id = RESLIB_INVALID_TYPE_RESPONSE;
+                break;
+
+            case ResponseClassifier::MISMATQUEST:
+                message_id = RESLIB_INVALID_QNAME_RESPONSE;
+                break;
+
+            case ResponseClassifier::MULTICLASS:
+                message_id = RESLIB_MULTIPLE_CLASS_RESPONSE;
+                break;
+
+            case ResponseClassifier::NOTONEQUEST:
+                message_id = RESLIB_NOT_ONE_QNAME_RESPONSE;
+                break;
+
+            case ResponseClassifier::NOTRESPONSE:
+                message_id = RESLIB_NOT_RESPONSE;
+                break;
+
+            case ResponseClassifier::NOTSINGLE:
+                message_id = RESLIB_NOTSINGLE_RESPONSE;
+
+            case ResponseClassifier::OPCODE:
+                message_id = RESLIB_OPCODE_RESPONSE;
+
+            default:
+                message_id = RESLIB_ERROR_RESPONSE;
+            }
+            LOG_DEBUG(logger, RESLIB_DBG_RESULTS, message_id).
+                      arg(questionText(question_));
+        }
+    }
+
 public:
     RunningQuery(IOService& io,
         const Question& question,
@@ -734,12 +809,7 @@ public:
                 incoming.fromWire(ibuf);
 
                 buffer_->clear();
-                if (incoming.getRcode() == Rcode::NOERROR()) {
-                    done_ = handleRecursiveAnswer(incoming);
-                } else {
-                    isc::resolve::copyResponseMessage(incoming, answer_message_);
-                    done_ = true;
-                }
+                done_ = handleRecursiveAnswer(incoming);
                 if (done_) {
                     callCallback(true);
                     stop();

+ 86 - 11
src/lib/resolve/resolve_messages.mes

@@ -15,22 +15,61 @@
 $NAMESPACE isc::resolve
 
 % RESLIB_ANSWER answer received in response to query for <%1>
-A debug message recording that an answer has been received to an upstream
-query for the specified question.  Previous debug messages will have indicated
-the server to which the question was sent.
+A debug message reporting that an answer has been received to an upstream
+query for the specified question.  Previous debug messages will have
+indicated the server to which the question was sent.
 
 % RESLIB_CNAME CNAME received in response to query for <%1>
-A debug message recording that CNAME response has been received to an upstream
-query for the specified question.  Previous debug messages will have indicated
-the server to which the question was sent.
+A debug message recording that CNAME response has been received to an
+upstream query for the specified question.  Previous debug messages will
+have indicated the server to which the question was sent.
 
 % RESLIB_DEEPEST did not find <%1> in cache, deepest delegation found is %2
-A debug message, a cache lookup did not find the specified <name, class,
-type> tuple in the cache; instead, the deepest delegation found is indicated.
+A debug message, a cache lookup did not find the specified <name,
+class, type> tuple in the cache; instead, the deepest delegation found
+is indicated.
+
+% RESLIB_EMPTY_RESPONSE empty response received to query for <%1>
+A debug message, the response to the specified query from an upstream
+nameserver did not contain anything in the answer or authority sections,
+although in all other respects it was a valid response.  A SERVFAIL will
+be returned to the system making the original query.
+
+% RESLIB_ERROR_RESPONSE unspecified error received in response to query for <%1>
+A debug message, the response to the specified query to an upstream
+nameserver indicated that the response was classified as an erroneous
+response, but that the nature of the error cannot be identified.
+A SERVFAIL will be returned to the system making the original query.
+
+% RESLIB_EXTRADATA_RESPONSE extra data in response to query for <%1>
+A debug message indicating that the response to the specified query
+from an upstream nameserver contained too much data.  This can happen if
+an ANY query was sent and the answer section in the response contained
+multiple RRs with different names.  A SERVFAIL will be returned to the
+system making the original query.
 
 % RESLIB_FOLLOW_CNAME following CNAME chain to <%1>
-A debug message, a CNAME response was received and another query is being issued
-for the <name, class, type> tuple.
+A debug message, a CNAME response was received and another query is
+being issued for the <name, class, type> tuple.
+
+% RESLIB_INVALID_NAMECLASS_RESPONSE invalid name or class in response to query for <%1>
+A debug message, the response to the specified query from an upstream
+nameserver (as identified by the ID of the response) contained either
+an answer not matching the query name or an answer having a different
+class to that queried for.  A SERVFAIL will be returned to the system
+making the original query.
+
+% RESLIB_INVALID_QNAME_RESPONSE invalid name or class in response to query for <%1>
+A debug message, the response to the specified query from an upstream
+nameserver (as identified by the ID of the response) contained a name
+in the question section that did not match that of the query. A SERVFAIL
+will be returned to the system making the original query.
+
+% RESLIB_INVALID_TYPE_RESPONSE invalid name or class in response to query for <%1>
+A debug message, the response to the specified query from an upstream
+nameserver (as identified by the ID of the response) contained an
+invalid type field. A SERVFAIL will be returned to the system making
+the original query.
 
 % RESLIB_LONG_CHAIN CNAME received in response to query for <%1>: CNAME chain length exceeded
 A debug message recording that a CNAME response has been received to an upstream
@@ -39,16 +78,47 @@ the server to which the question was sent).  However, receipt of this CNAME
 has meant that the resolver has exceeded the CNAME chain limit (a CNAME chain
 is where on CNAME points to another) and so an error is being returned.
 
+% RESLIB_MULTIPLE_CLASS_RESPONSE response to query for <%1> contained multiple RRsets with different classes
+A debug message reporting that the response to an upstream query for
+the specified name contained multiple RRsets in the answer and not all
+were of the same class.  This is a violation of the standard and so a
+SERVFAIL will be returned.
+
 % RESLIB_NO_NS_RRSET no NS RRSet in referral response received to query for <%1>
 A debug message, this indicates that a response was received for the specified
 query and was categorized as a referral.  However, the received message did
 not contain any NS RRsets.  This may indicate a programming error in the
 response classification code.
 
+% RESLIB_NOT_ONE_QNAME_RESPONSE not one question in response to query for <%1>
+A debug message, the response to the specified query from an upstream
+nameserver (as identified by the ID of the response) did not contain
+one name in the question section as required by the standard. A SERVFAIL
+will be returned to the system making the original query.
+
+% RESLIB_NOT_RESPONSE response to query for <%1> was not a response
+A debug message, the response to the specified query from an upstream
+nameserver (as identified by the ID of the response) did not have the QR
+bit set (thus indicating that the packet was a query, not a response).
+A SERVFAIL will be returned to the system making the original query.
+
+% RESLIB_NOTSINGLE_RESPONSE response to query for <%1> was not a response
+A debug message, the response to the specified query from an upstream
+nameserver was a CNAME that had mutiple RRs in the RRset.  This is
+an invalid response according to the standards so a SERVFAIL will be
+returned to the system making the original query.
+
 % RESLIB_NSAS_LOOKUP looking up nameserver for zone %1 in the NSAS
 A debug message, the RunningQuery object is querying the NSAS for the
 nameservers for the specified zone.
 
+% RESLIB_OPCODE_RESPONSE response to query for <%1> did not have query opcode
+A debug message, the response to the specified query from an upstream
+nameserver was a response that did not have the opcode set to that of
+a query.  According to the standards, this is an invalid response to
+the query that was made, so a SERVFAIL will be returned to the system
+making the original query.
+
 % RESLIB_NXDOM_NXRR NXDOMAIN/NXRRSET received in response to query for <%1>
 A debug message recording that either a NXDOMAIN or an NXRRSET response has
 been received to an upstream query for the specified question.  Previous debug
@@ -63,7 +133,7 @@ A debug message indicating that a protocol error was received and that
 the resolver is repeating the query to the same nameserver.  After this
 repeated query, there will be the indicated number of retries left.
 
-% RESLIB_RCODE_ERR RCODE indicates error in response to query for <%1>
+% RESLIB_RCODE_ERROR response to query for <%1> returns RCODE of %2
 A debug message, the response to the specified query indicated an error
 that is not covered by a specific code path.  A SERVFAIL will be returned.
 
@@ -122,6 +192,11 @@ A debug message indicating that a RunningQuery's success callback has been
 called because a nameserver has been found, and that a query is being sent
 to the specified nameserver.
 
+% RESLIB_TCP_TRUNCATED TCP response to query for %1 was truncated
+This is a debug message logged when a response to the specified  query to an
+upstream nameserver returned a response with the TC (truncation) bit set.  This
+is treated as an error by the code.
+
 % RESLIB_TEST_SERVER setting test server to %1(%2)
 This is a warning message only generated in unit tests.  It indicates
 that all upstream queries from the resolver are being routed to the