Browse Source

[master] Merge branch 'trac2562'

JINMEI Tatuya 12 years ago
parent
commit
119eed9938

+ 25 - 4
src/bin/auth/auth_messages.mes

@@ -374,12 +374,33 @@ XFRIN (Transfer-in) process.  It is issued during server startup is an
 indication that the initialization is proceeding normally.
 
 % AUTH_ZONEMGR_COMMS error communicating with zone manager: %1
-This is a debug message output during the processing of a NOTIFY request.
+This is an internal error during the processing of a NOTIFY request.
 An error (listed in the message) has been encountered whilst communicating
 with the zone manager. The NOTIFY request will not be honored.
+This may be some temporary failure, but is generally an unexpected
+event and is quite likely a bug.  It's probably worth filing a report.
 
 % AUTH_ZONEMGR_ERROR received error response from zone manager: %1
-This is a debug message output during the processing of a NOTIFY
-request. The zone manager component has been informed of the request,
+The zone manager component has been informed of the request,
 but has returned an error response (which is included in the message). The
-NOTIFY request will not be honored.
+NOTIFY request will not be honored.  As of this writing, this can only
+happen due to a bug inside the Zonemgr implementation.  Zonemgr itself
+may log more detailed cause of this, and these are probably worth
+filing a bug report.
+
+% AUTH_ZONEMGR_NOTEXIST received NOTIFY but Zonemgr does not exist
+This is a debug message produced by the authoritative server when it
+receives a NOTIFY message but the Zonemgr component is not running at
+that time.  Not running Zonemgr is completely valid for, e.g., primary
+only servers, so this is not necessarily a problem.  If this message
+is logged even if Zonemgr is supposed to be running, it's encouraged
+to check other logs to identify why that happens.  It may or may not
+be a real problem (for example, if it's immediately after the system
+startup, it's possible that Auth has started up and is running but
+Zonemgr is not yet).  Even if this is indeed an unexpected case,
+Zonemgr should normally be restarted by the Init process, so unless
+this repeats too often it may be negligible in practice (still it's
+worth filing a bug report).  In any case, the authoritative server
+simply drops the NOTIFY message; if it's a temporary failure or
+delayed startup, subsequently resent messages will eventually reach
+Zonemgr.

+ 21 - 11
src/bin/auth/auth_srv.cc

@@ -22,6 +22,7 @@
 #include <config/ccsession.h>
 
 #include <cc/data.h>
+#include <cc/proto_defs.h>
 
 #include <exceptions/exceptions.h>
 
@@ -789,18 +790,15 @@ AuthSrvImpl::processNotify(const IOMessage& io_message, Message& message,
         return (true);
     }
 
-    // In the code that follows, we simply ignore the notify if any internal
-    // error happens rather than returning (e.g.) SERVFAIL.  RFC 1996 is
-    // silent about such cases, but there doesn't seem to be anything we can
-    // improve at the primary server side by sending an error anyway.
-    if (xfrin_session_ == NULL) {
-        LOG_DEBUG(auth_logger, DBG_AUTH_DETAIL, AUTH_NO_XFRIN);
-        return (false);
-    }
-
     LOG_DEBUG(auth_logger, DBG_AUTH_DETAIL, AUTH_RECEIVED_NOTIFY)
         .arg(question->getName()).arg(question->getClass()).arg(remote_ep);
 
+    // xfrin_session_ should have been set and never be replaced except in
+    // tests; otherwise it's an internal bug.  assert() may be too strong,
+    // but processMessage() will catch all exceptions, so there's no better
+    // way.
+    assert(xfrin_session_);
+
     const string remote_ip_address = remote_ep.getAddress().toText();
     static const string command_template_start =
         "{\"command\": [\"notify\", {\"zone_name\" : \"";
@@ -816,12 +814,24 @@ AuthSrvImpl::processNotify(const IOMessage& io_message, Message& message,
                 command_template_end);
         const unsigned int seq =
             xfrin_session_->group_sendmsg(notify_command, "Zonemgr",
-                                          "*", "*");
+                                          CC_INSTANCE_WILDCARD,
+                                          CC_INSTANCE_WILDCARD, true);
         ConstElementPtr env, answer, parsed_answer;
         xfrin_session_->group_recvmsg(env, answer, false, seq);
         int rcode;
         parsed_answer = parseAnswer(rcode, answer);
-        if (rcode != 0) {
+        if (rcode == CC_REPLY_NO_RECPT) {
+            // This can happen when Zonemgr is not running.  When we support
+            // notification-based membership framework, we should check if it's
+            // supposed to be running and shouldn't even send the command if
+            // not.  Until then, we log this event at the debug level as we
+            // don't know whether it's a real trouble or intentional
+            // configuration.  (Also, when it's done, maybe we should simply
+            // propagate the exception and return SERVFAIL to suppress further
+            // NOTIFY).
+            LOG_DEBUG(auth_logger, DBG_AUTH_DETAIL, AUTH_ZONEMGR_NOTEXIST);
+            return (false);
+        } else if (rcode != CC_REPLY_SUCCESS) {
             LOG_ERROR(auth_logger, AUTH_ZONEMGR_ERROR)
                       .arg(parsed_answer->str());
             return (false);

+ 9 - 2
src/bin/auth/tests/auth_srv_unittest.cc

@@ -26,6 +26,8 @@
 #include <dns/rdataclass.h>
 #include <dns/tsig.h>
 
+#include <cc/proto_defs.h>
+
 #include <server_common/portconfig.h>
 #include <server_common/keyring.h>
 
@@ -868,10 +870,12 @@ TEST_F(AuthSrvTest, notifyWithErrorRcode) {
                 Opcode::NOTIFY().getCode(), QR_FLAG | AA_FLAG, 1, 0, 0, 0);
 }
 
-TEST_F(AuthSrvTest, notifyWithoutSession) {
+TEST_F(AuthSrvTest, notifyWithoutRecipient) {
     updateInMemory(server, "example.", CONFIG_INMEMORY_EXAMPLE, false);
 
-    server.setXfrinSession(NULL);
+    // Emulate the case where msgq tells auth there's no Zonemgr module.
+    notify_session.setMessage(isc::config::createAnswer(CC_REPLY_NO_RECPT,
+                                                        "no recipient"));
 
     UnitTestUtil::createRequestMessage(request_message, Opcode::NOTIFY(),
                                        default_qid, Name("example"),
@@ -883,6 +887,9 @@ TEST_F(AuthSrvTest, notifyWithoutSession) {
     // happens.
     server.processMessage(*io_message, *parse_message, *response_obuffer,
                           &dnsserv);
+    // want_answer should have been set to true so auth can catch it if zonemgr
+    // is not running.
+    EXPECT_TRUE(notify_session.wasAnswerWanted());
     EXPECT_FALSE(dnsserv.hasAnswer());
 }
 

+ 15 - 11
src/lib/config/ccsession.h

@@ -375,25 +375,29 @@ public:
         return (session_.group_sendmsg(msg, group, instance, to, want_answer));
     };
 
-    /**
-     * Receive a message from the underlying CC session.
-     * This has the same interface as isc::cc::Session::group_recvmsg()
-     *
-     * \param envelope see isc::cc::Session::group_recvmsg()
-     * \param msg see isc::cc::Session::group_recvmsg()
-     * \param nonblock see isc::cc::Session::group_recvmsg()
-     * \param seq see isc::cc::Session::group_recvmsg()
-     * \return see isc::cc::Session::group_recvmsg()
-     */
+    /// \brief Receive a message from the underlying CC session.
+    /// This has the same interface as isc::cc::Session::group_recvmsg()
+    ///
+    /// NOTE: until #2804 is resolved this method wouldn't work except in
+    /// very limited cases; don't try to use it until then.
+    ///
+    /// \param envelope see isc::cc::Session::group_recvmsg()
+    /// \param msg see isc::cc::Session::group_recvmsg()
+    /// \param nonblock see isc::cc::Session::group_recvmsg()
+    /// \param seq see isc::cc::Session::group_recvmsg()
+    /// \return see isc::cc::Session::group_recvmsg()
     bool groupRecvMsg(isc::data::ConstElementPtr& envelope,
                       isc::data::ConstElementPtr& msg,
                       bool nonblock = true,
                       int seq = -1) {
         return (session_.group_recvmsg(envelope, msg, nonblock, seq));
-    };
+    }
 
     /// \brief Send a command message and wait for the answer.
     ///
+    /// NOTE: until #2804 is resolved this method wouldn't work except in
+    /// very limited cases; don't try to use it until then.
+    ///
     /// This is mostly a convenience wrapper around groupSendMsg
     /// and groupRecvMsg, with some error handling.
     ///

+ 12 - 5
src/lib/testutils/mockups.h

@@ -40,15 +40,16 @@ public:
     MockSession() :
         // by default we return a simple "success" message.
         msg_(isc::data::Element::fromJSON("{\"result\": [0, \"SUCCESS\"]}")),
-        send_ok_(true), receive_ok_(true)
+        send_ok_(true), receive_ok_(true), answer_wanted_(false)
     {}
 
 
     virtual void establish(const char*) {}
     virtual void disconnect() {}
 
-    virtual int group_sendmsg(isc::data::ConstElementPtr msg, std::string group,
-                              std::string, std::string, bool)
+    virtual int group_sendmsg(isc::data::ConstElementPtr msg,
+                              std::string group,
+                              std::string, std::string, bool want_answer)
     {
         if (!send_ok_) {
             isc_throw(isc::cc::SessionError,
@@ -57,6 +58,7 @@ public:
 
         sent_msg_ = msg;
         msg_dest_ = group;
+        answer_wanted_ = want_answer;
         return (0);
     }
 
@@ -93,8 +95,12 @@ public:
     void disableSend() { send_ok_ = false; }
     void disableReceive() { receive_ok_ = false; }
 
-    isc::data::ConstElementPtr getSentMessage() { return (sent_msg_); }
-    std::string getMessageDest() { return (msg_dest_); }
+    isc::data::ConstElementPtr getSentMessage() const { return (sent_msg_); }
+    std::string getMessageDest() const { return (msg_dest_); }
+
+    /// \brief Return the value of want_answer parameter of the previous call
+    /// to group_sendmsg().
+    bool wasAnswerWanted() const { return (answer_wanted_); }
 
 private:
     isc::data::ConstElementPtr sent_msg_;
@@ -102,6 +108,7 @@ private:
     isc::data::ConstElementPtr msg_;
     bool send_ok_;
     bool receive_ok_;
+    bool answer_wanted_;
 };
 
 // This mock object does nothing except for recording passed parameters

+ 39 - 0
tests/lettuce/features/xfrin_notify_handling.feature

@@ -334,3 +334,42 @@ Feature: Xfrin incoming notify handling
     Then wait for new master stderr message NOTIFY_OUT_REPLY_RECEIVED
 
     A query for www.example.org to [::1]:47806 should have rcode NXDOMAIN
+
+    #
+    # Test for NOTIFY when zonemgr is not running
+    #
+    Scenario: Handle incoming notify while zonemgr is not running
+    Given I have bind10 running with configuration xfrin/retransfer_master.conf with cmdctl port 47804 as master
+    And wait for master stderr message BIND10_STARTED_CC
+    And wait for master stderr message CMDCTL_STARTED
+    And wait for master stderr message AUTH_SERVER_STARTED
+    And wait for master stderr message XFROUT_STARTED
+    And wait for master stderr message ZONEMGR_STARTED
+    And wait for master stderr message STATS_STARTING
+
+    And I have bind10 running with configuration xfrin/retransfer_slave_notify.conf
+    And wait for bind10 stderr message BIND10_STARTED_CC
+    And wait for bind10 stderr message CMDCTL_STARTED
+    And wait for bind10 stderr message AUTH_SERVER_STARTED
+    And wait for bind10 stderr message XFRIN_STARTED
+    And wait for bind10 stderr message ZONEMGR_STARTED
+
+    # remove zonemgr from the system.  a subsequent notify is ignored, but
+    # an error message shouldn't be logged at auth.
+    When I send bind10 the following commands with cmdctl
+    """
+    config remove Init/components b10-zonemgr
+    config commit
+    """
+    last bindctl output should not contain "error"
+    And wait for new bind10 stderr message BIND10_PROCESS_ENDED
+
+    A query for www.example.org to [::1]:47806 should have rcode NXDOMAIN
+
+    When I send bind10 with cmdctl port 47804 the command Xfrout notify example.org IN
+    Then wait for master stderr message XFROUT_NOTIFY_COMMAND
+    Then wait for new bind10 stderr message AUTH_RECEIVED_NOTIFY
+    Then wait for new bind10 stderr message AUTH_ZONEMGR_NOTEXIST not AUTH_ZONEMGR_ERROR
+    Then wait for master stderr message NOTIFY_OUT_TIMEOUT not NOTIFY_OUT_REPLY_RECEIVED
+
+    A query for www.example.org to [::1]:47806 should have rcode NXDOMAIN