Browse Source

[master] Merge branch 'trac2617'

JINMEI Tatuya 12 years ago
parent
commit
6dae0d1f01

+ 3 - 0
src/bin/auth/auth_messages.mes

@@ -312,6 +312,9 @@ been created and is initializing. The AUTH_SERVER_STARTED message will be
 output when initialization has successfully completed and the server starts
 accepting queries.
 
+% AUTH_SERVER_EXITING exiting
+The authoritative server is exiting.
+
 % AUTH_SERVER_FAILED server failed: %1
 The authoritative server has encountered a fatal error and is terminating. The
 reason for the failure is included in the message.

+ 16 - 16
src/bin/auth/main.cc

@@ -44,6 +44,7 @@
 #include <server_common/socket_request.h>
 
 #include <boost/bind.hpp>
+#include <boost/scoped_ptr.hpp>
 
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -152,10 +153,11 @@ main(int argc, char* argv[]) {
     int ret = 0;
 
     // XXX: we should eventually pass io_service here.
-    Session* cc_session = NULL;
-    Session* xfrin_session = NULL;
+    boost::scoped_ptr<AuthSrv> auth_server_; // placeholder
+    boost::scoped_ptr<Session> cc_session;
+    boost::scoped_ptr<Session> xfrin_session;
     bool xfrin_session_established = false; // XXX (see Trac #287)
-    ModuleCCSession* config_session = NULL;
+    boost::scoped_ptr<ModuleCCSession> config_session;
     XfroutClient xfrout_client(getXfroutSocketPath());
     SocketSessionForwarder ddns_forwarder(getDDNSSocketPath());
     try {
@@ -167,7 +169,8 @@ main(int argc, char* argv[]) {
             specfile = string(AUTH_SPECFILE_LOCATION);
         }
 
-        auth_server = new AuthSrv(xfrout_client, ddns_forwarder);
+        auth_server_.reset(new AuthSrv(xfrout_client, ddns_forwarder));
+        auth_server = auth_server_.get();
         LOG_INFO(auth_logger, AUTH_SERVER_CREATED);
 
         SimpleCallback* checkin = auth_server->getCheckinProvider();
@@ -179,7 +182,7 @@ main(int argc, char* argv[]) {
         auth_server->setDNSService(dns_service);
         LOG_DEBUG(auth_logger, DBG_AUTH_START, AUTH_DNS_SERVICES_CREATED);
 
-        cc_session = new Session(io_service.get_io_service());
+        cc_session.reset(new Session(io_service.get_io_service()));
         LOG_DEBUG(auth_logger, DBG_AUTH_START, AUTH_CONFIG_CHANNEL_CREATED);
         // Initialize the Socket Requestor
         isc::server_common::initSocketRequestor(*cc_session, AUTH_NAME);
@@ -191,18 +194,18 @@ main(int argc, char* argv[]) {
         // updateConfig() for listen_on and in initializing TSIG keys below).
         // Until then all operations on the CC session will take place
         // synchronously.
-        config_session = new ModuleCCSession(specfile, *cc_session,
-                                             my_config_handler,
-                                             my_command_handler, false);
+        config_session.reset(new ModuleCCSession(specfile, *cc_session,
+                                                 my_config_handler,
+                                                 my_command_handler, false));
         LOG_DEBUG(auth_logger, DBG_AUTH_START, AUTH_CONFIG_CHANNEL_ESTABLISHED);
 
-        xfrin_session = new Session(io_service.get_io_service());
+        xfrin_session.reset(new Session(io_service.get_io_service()));
         LOG_DEBUG(auth_logger, DBG_AUTH_START, AUTH_XFRIN_CHANNEL_CREATED);
         xfrin_session->establish(NULL);
         xfrin_session_established = true;
         LOG_DEBUG(auth_logger, DBG_AUTH_START, AUTH_XFRIN_CHANNEL_ESTABLISHED);
 
-        auth_server->setXfrinSession(xfrin_session);
+        auth_server->setXfrinSession(xfrin_session.get());
 
         // Configure the server.  configureAuthServer() is expected to install
         // all initial configurations, but as a short term workaround we
@@ -210,7 +213,7 @@ main(int argc, char* argv[]) {
         // updateConfig().
         // if server load configure failed, we won't exit, give user second
         // chance to correct the configure.
-        auth_server->setConfigSession(config_session);
+        auth_server->setConfigSession(config_session.get());
         try {
             configureAuthServer(*auth_server, config_session->getFullConfig());
             auth_server->updateConfig(ElementPtr());
@@ -228,7 +231,7 @@ main(int argc, char* argv[]) {
         config_session->addRemoteConfig("data_sources",
                                         boost::bind(datasrcConfigHandler,
                                                     auth_server, &first_time,
-                                                    config_session,
+                                                    config_session.get(),
                                                     _1, _2, _3),
                                         false);
 
@@ -260,10 +263,7 @@ main(int argc, char* argv[]) {
         config_session->removeRemoteConfig("data_sources");
     }
 
-    delete xfrin_session;
-    delete config_session;
-    delete cc_session;
-    delete auth_server;
+    LOG_INFO(auth_logger, AUTH_SERVER_EXITING);
 
     return (ret);
 }

+ 2 - 0
src/bin/cmdctl/cmdctl.py.in

@@ -650,4 +650,6 @@ if __name__ == '__main__':
     if httpd:
         httpd.shutdown()
 
+    logger.info(CMDCTL_EXITING)
+
     sys.exit(result)

+ 3 - 0
src/bin/cmdctl/cmdctl_messages.mes

@@ -43,6 +43,9 @@ specific error is printed in the message.
 This debug message indicates that the given command has been sent to
 the given module.
 
+% CMDCTL_EXITING exiting
+The b10-cmdctl daemon is exiting.
+
 % CMDCTL_NO_SUCH_USER username not found in user database: %1
 A login attempt was made to b10-cmdctl, but the username was not known.
 Users can be added with the tool b10-cmdctl-usermgr.

+ 111 - 44
src/bin/msgq/msgq.py.in

@@ -70,6 +70,23 @@ SPECFILE_LOCATION = SPECFILE_PATH + "/msgq.spec"
 
 class MsgQReceiveError(Exception): pass
 
+class MsgQCloseOnReceive(Exception):
+    """Exception raised when reading data from a socket results in 'shutdown'.
+
+    This happens when msgq received 0-length data.  This class holds whether
+    it happens in the middle of reading (i.e. after reading some) via
+    partial_read parameter, which is set to True if and only if so.
+    This will be used by an upper layer catching the exception to distinguish
+    the severity of the event.
+
+    """
+    def __init__(self, reason, partial_read):
+        self.partial_read = partial_read
+        self.__reason = reason
+
+    def __str__(self):
+        return self.__reason
+
 class SubscriptionManager:
     def __init__(self, cfgmgr_ready):
         """
@@ -311,23 +328,22 @@ class MsgQ:
         lname = self.newlname()
         self.lnames[lname] = newsocket
 
+        logger.debug(TRACE_BASIC, MSGQ_SOCKET_REGISTERED, newsocket.fileno(),
+                     lname)
+
         if self.poller:
             self.poller.register(newsocket, select.POLLIN)
         else:
             self.add_kqueue_socket(newsocket)
 
-    def process_socket(self, fd):
-        """Process a read on a socket."""
-        if not fd in self.sockets:
-            logger.error(MSGQ_READ_UNKNOWN_FD, fd)
-            return
-        sock = self.sockets[fd]
-        self.process_packet(fd, sock)
-
     def kill_socket(self, fd, sock):
         """Fully close down the socket."""
+        # Unregister events on the socket.  Note that we don't have to do
+        # this for kqueue because the registered events are automatically
+        # deleted when the corresponding socket is closed.
         if self.poller:
             self.poller.unregister(sock)
+
         self.subs.unsubscribe_all(sock)
         lname = [ k for k, v in self.lnames.items() if v == sock ][0]
         del self.lnames[lname]
@@ -337,24 +353,35 @@ class MsgQ:
             del self.sendbuffs[fd]
         logger.debug(TRACE_BASIC, MSGQ_SOCK_CLOSE, fd)
 
-    def getbytes(self, fd, sock, length):
+    def __getbytes(self, fd, sock, length, continued):
         """Get exactly the requested bytes, or raise an exception if
-           EOF."""
+           EOF.
+
+           continued is set to True if this method is called to complete
+           already read data.
+           """
         received = b''
         while len(received) < length:
             try:
                 data = sock.recv(length - len(received))
-            except socket.error:
-                raise MsgQReceiveError(socket.error)
+
+            except socket.error as err:
+                # This case includes ECONNRESET, which seems to happen when
+                # the remote client has closed its socket at some subtle
+                # timing (it should normally result in receiving empty data).
+                # Since we didn't figure out how exactly that could happen,
+                # we treat it just like other really-unexpected socket errors.
+                raise MsgQReceiveError(str(err))
             if len(data) == 0:
-                raise MsgQReceiveError("EOF")
+                raise MsgQCloseOnReceive("EOF", continued)
             received += data
+            continued = True
         return received
 
     def read_packet(self, fd, sock):
         """Read a correctly formatted packet.  Will raise exceptions if
            something fails."""
-        lengths = self.getbytes(fd, sock, 6)
+        lengths = self.__getbytes(fd, sock, 6, False)
         overall_length, routing_length = struct.unpack(">IH", lengths)
         if overall_length < 2:
             raise MsgQReceiveError("overall_length < 2")
@@ -365,9 +392,9 @@ class MsgQ:
             raise MsgQReceiveError("routing_length == 0")
         data_length = overall_length - routing_length
         # probably need to sanity check lengths here...
-        routing = self.getbytes(fd, sock, routing_length)
+        routing = self.__getbytes(fd, sock, routing_length, True)
         if data_length > 0:
-            data = self.getbytes(fd, sock, data_length)
+            data = self.__getbytes(fd, sock, data_length, True)
         else:
             data = None
         return (routing, data)
@@ -376,8 +403,15 @@ class MsgQ:
         """Process one packet."""
         try:
             routing, data = self.read_packet(fd, sock)
-        except MsgQReceiveError as err:
-            logger.error(MSGQ_RECV_ERR, fd, err)
+        except (MsgQReceiveError, MsgQCloseOnReceive) as err:
+            # If it's MsgQCloseOnReceive and that happens without reading
+            # any data, it basically means the remote clinet has closed the
+            # socket, so we log it as debug information.  Otherwise, it's
+            # a somewhat unexpected event, so we consider it an "error".
+            if isinstance(err, MsgQCloseOnReceive) and not err.partial_read:
+                logger.debug(TRACE_BASIC, MSGQ_CLOSE_ON_RECV, fd)
+            else:
+                logger.error(MSGQ_RECV_ERR, fd, err)
             self.kill_socket(fd, sock)
             return
 
@@ -428,9 +462,12 @@ class MsgQ:
     def sendmsg(self, sock, env, msg = None):
         self.send_prepared_msg(sock, self.preparemsg(env, msg))
 
-    def __send_data(self, sock, data):
+    def _send_data(self, sock, data):
         """
-        Send a piece of data to the given socket.
+        Send a piece of data to the given socket.  This method is
+        essentially "private" to MsgQ, but defined as if it were "protected"
+        for easier access from tests.
+
         Parameters:
         sock: The socket to send to
         data: The list of bytes to send
@@ -446,15 +483,17 @@ class MsgQ:
             sock.setblocking(0)
             return sock.send(data)
         except socket.error as e:
-            if e.errno in [ errno.EAGAIN,
-                            errno.EWOULDBLOCK,
-                            errno.EINTR ]:
+            if e.errno in [ errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR ]:
                 return 0
-            elif e.errno in [ errno.EPIPE,
-                              errno.ECONNRESET,
-                              errno.ENOBUFS ]:
-                logger.error(MSGQ_SEND_ERR, sock.fileno(),
-                             errno.errorcode[e.errno])
+            elif e.errno in [ errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS ]:
+                # EPIPE happens if the remote module has terminated by the time
+                # of this send; its severity can vary, but in many cases it
+                # shouldn't be critical, so we log it separately as a warning.
+                if e.errno == errno.EPIPE:
+                    logger.warn(MSGQ_CLOSE_ON_SEND, sock.fileno())
+                else:
+                    logger.error(MSGQ_SEND_ERR, sock.fileno(),
+                                 errno.errorcode[e.errno])
                 self.kill_socket(sock.fileno(), sock)
                 return None
             else:
@@ -469,7 +508,7 @@ class MsgQ:
         if fileno in self.sendbuffs:
             amount_sent = 0
         else:
-            amount_sent = self.__send_data(sock, msg)
+            amount_sent = self._send_data(sock, msg)
             if amount_sent is None:
                 # Socket has been killed, drop the send
                 return
@@ -489,7 +528,7 @@ class MsgQ:
                 last_sent = now
                 if self.poller:
                     self.poller.register(fileno, select.POLLIN |
-                        select.POLLOUT)
+                                         select.POLLOUT)
                 else:
                     self.add_kqueue_socket(sock, True)
             self.sendbuffs[fileno] = (last_sent, buff)
@@ -498,7 +537,7 @@ class MsgQ:
         # Try to send some data from the buffer
         (_, msg) = self.sendbuffs[fileno]
         sock = self.sockets[fileno]
-        amount_sent = self.__send_data(sock, msg)
+        amount_sent = self._send_data(sock, msg)
         if amount_sent is not None:
             # Keep the rest
             msg = msg[amount_sent:]
@@ -592,12 +631,17 @@ class MsgQ:
                         self.running = False
                         break
                     else:
-                        if event & select.POLLOUT:
-                            self.__process_write(fd)
-                        elif event & select.POLLIN:
-                            self.process_socket(fd)
-                        else:
+                        writable = event & select.POLLOUT
+                        # Note: it may be okay to read data if available
+                        # immediately after write some, but due to unexpected
+                        # regression (see comments on the kqueue version below)
+                        # we restrict one operation per iteration for now.
+                        # In future we may clarify the point and enable the
+                        # "read/write" mode.
+                        readable = not writable and (event & select.POLLIN)
+                        if not writable and not readable:
                             logger.error(MSGQ_POLL_UNKNOWN_EVENT, fd, event)
+                        self._process_fd(fd, writable, readable, False)
 
     def run_kqueue(self):
         while self.running:
@@ -616,14 +660,35 @@ class MsgQ:
                         self.running = False
                         break;
                     else:
-                        if event.filter == select.KQ_FILTER_WRITE:
-                            self.__process_write(event.ident)
-                        if event.filter == select.KQ_FILTER_READ and \
-                                event.data > 0:
-                            self.process_socket(event.ident)
-                        elif event.flags & select.KQ_EV_EOF:
-                            self.kill_socket(event.ident,
-                                             self.sockets[event.ident])
+                        fd = event.ident
+                        writable = event.filter == select.KQ_FILTER_WRITE
+                        readable = (event.filter == select.KQ_FILTER_READ and
+                                    event.data > 0)
+                        # It seems to break some of our test cases if we
+                        # immediately close the socket on EOF after reading
+                        # some data.  It may be possible to avoid by tweaking
+                        # the test, but unless we can be sure we'll hold off.
+                        closed = (not readable and
+                                  (event.flags & select.KQ_EV_EOF))
+                        self._process_fd(fd, writable, readable, closed)
+
+    def _process_fd(self, fd, writable, readable, closed):
+        '''Process a single FD: unified subroutine of run_kqueue/poller.
+
+        closed can be True only in the case of kqueue.  This is essentially
+        private but is defined as if it were "protected" so it's callable
+        from tests.
+
+        '''
+        # We need to check if FD is still in the sockets dict, because
+        # it's possible that the socket has been "killed" while processing
+        # other FDs; it's even possible it's killed within this method.
+        if writable and fd in self.sockets:
+            self.__process_write(fd)
+        if readable and fd in self.sockets:
+            self.process_packet(fd, self.sockets[fd])
+        if closed and fd in self.sockets:
+            self.kill_socket(fd, self.sockets[fd])
 
     def stop(self):
         # Signal it should terminate.
@@ -760,3 +825,5 @@ if __name__ == "__main__":
         pass
 
     msgq.shutdown()
+
+    logger.info(MSGQ_EXITING)

+ 55 - 11
src/bin/msgq/msgq_messages.mes

@@ -23,6 +23,31 @@
 This is a debug message. The message queue has little bit of special handling
 for the configuration manager. This special handling is happening now.
 
+% MSGQ_CLOSE_ON_RECV Reading from socket canceled as it's closed: FD=%1
+A debug message.  The msgq daemon was notified of a read event on a
+socket, but its initial read operation failed because the remote
+client has closed its socket.  This is possible in a normal operation
+when a module shuts down.
+
+% MSGQ_CLOSE_ON_SEND Sending to socket failed as already closed (okay to ignore on shutdown): FD=%1
+The msgq daemon tries to send some data to a client module, but it
+failed because the socket has been closed.  This normally means the
+client terminates (for some reason - either gracefully or as a crash)
+while other modules try to send a message to the terminated module.
+Since msgq doesn't keep track of the status of client modules, this
+can happen and is not really an error for msgq; however, it can still
+be an unexpected event for the BIND 10 system as a whole in that this
+particular message is lost, so it's logged as a warning.  If this
+message is logged for a running BIND 10 system, it's suggested to
+check other log messages; there may be an error from other modules
+reporting a missing response message.  One common, less critical case
+where this message is logged is during shutdown.  The ordering of
+process shutdown is basically arbitrary at this moment, so it's
+possible that some module tries to send a "quitting" message to some
+other module but the latter has already shut down.  Such cases are
+generally non critical, but you may want to check other possible error
+messages.
+
 % MSGQ_COMMAND Running command %1 with arguments %2
 Debug message. The message queue received a command and it is running it.
 
@@ -34,6 +59,9 @@ the message queue version and version of the module.
 % MSGQ_CONFIG_DATA Received configuration update for the msgq: %1
 Debug message. The message queue received a configuration update, handling it.
 
+% MSGQ_EXITING exiting
+The msgq daemon is exiting.
+
 % MSGQ_HDR_DECODE_ERR Error decoding header received from socket %1: %2
 The socket with mentioned file descriptor sent a packet. However, it was not
 possible to decode the routing header of the packet. The packet is ignored.
@@ -41,6 +69,11 @@ This may be caused by a programmer error (one of the components sending invalid
 data) or possibly by incompatible version of msgq and the component (but that's
 unlikely, as the protocol is not changed often).
 
+% MSGQ_INVALID_CMD Received invalid command: %1
+An unknown command listed in the log has been received. It is ignored. This
+indicates either a programmer error (eg. a typo in the command name) or
+incompatible version of a module and message queue daemon.
+
 % MSGQ_LISTENER_FAILED Failed to initialize listener on socket file '%1': %2
 The message queue daemon tried to listen on a file socket (the path is in the
 message), but it failed. The error from the operating system is logged.
@@ -63,23 +96,29 @@ happen and it is either a programmer error or OS bug. The event is ignored. The
 number noted as the event is the raw encoded value, which might be useful to
 the authors when figuring the problem out.
 
-% MSGQ_READ_UNKNOWN_FD Got read on strange socket %1
-The OS reported a file descriptor is ready to read. But the daemon doesn't know
-the mentioned file descriptor, which is either a programmer error or OS bug.
-The read event is ignored.
-
 % MSGQ_RECV_ERR Error reading from socket %1: %2
 There was a low-level error when reading from a socket. The error is logged and
-the corresponding socket is dropped.
+the corresponding socket is dropped.  The errors include receiving
+broken or (non empty but) incomplete data.  In either case it usually suggests
+something unexpected happens within the BIND 10 system; it's probably
+better to restart the system, and if it continues it should be
+reported as a bug.  One known, probably non critical case is
+the "connection reset by peer" (or its variants) socket error appearing
+on shutdown.  It's known this happens when the remote client closes the
+connection as part of shutdown process.  Such cases are normally expected
+to be reported as receiving empty data (which we log it at the debug level
+as the MSGQ_CLOSE_ON_RECV message), but for some (yet) unknown reason
+it can also be reported as the system error.  At shutdown time it's expected
+that connections are closed, so it's probably safe to ignore these messages
+in such a case.  We still log them as an error as we've not figured out
+how exactly that can happen.  In future, we may make the shutdown process
+more robust so the msgq daemon can explicitly know when a client shuts down
+more reliably.  If and when it's implemented this error message won't appear
+on shutdown unless there's really something unexpected.
 
 % MSGQ_RECV_HDR Received header: %1
 Debug message. This message includes the whole routing header of a packet.
 
-% MSGQ_INVALID_CMD Received invalid command: %1
-An unknown command listed in the log has been received. It is ignored. This
-indicates either a programmer error (eg. a typo in the command name) or
-incompatible version of a module and message queue daemon.
-
 % MSGQ_SEND_ERR Error while sending to socket %1: %2
 There was a low-level error when sending data to a socket. The error is logged
 and the corresponding socket is dropped.
@@ -87,6 +126,11 @@ and the corresponding socket is dropped.
 % MSGQ_SHUTDOWN Stopping Msgq
 Debug message. The message queue is shutting down.
 
+% MSGQ_SOCKET_REGISTERED Registered a socket descriptor %1 with lname %2
+Debug message. The msgq daemon accepted a session request on the
+shown descriptor of socket and assigned a unique identifier (lname)
+for the client on that socket.
+
 % MSGQ_SOCK_CLOSE Closing socket fd %1
 Debug message. Closing the mentioned socket.
 

+ 173 - 0
src/bin/msgq/tests/msgq_test.py

@@ -1,3 +1,4 @@
+import msgq
 from msgq import SubscriptionManager, MsgQ
 
 import unittest
@@ -559,6 +560,178 @@ class ThreadTests(unittest.TestCase):
         test_thread.join(60)
         self.assertTrue(self.__result)
 
+class SocketTests(unittest.TestCase):
+    '''Test cases for micro behaviors related to socket operations.
+
+    Some cases are covered as part of other tests, but in this fixture
+    we check more details of specific method related to socket operation,
+    with the help of mock classes to avoid expensive overhead.
+
+    '''
+    class MockSocket():
+        '''A mock socket used instead of standard socket objects.'''
+        def __init__(self):
+            self.ex_on_send = None # raised from send() if not None
+            self.recv_result = b'test' # dummy data or exception
+            self.blockings = [] # history of setblocking() params
+        def setblocking(self, on):
+            self.blockings.append(on)
+        def send(self, data):
+            if self.ex_on_send is not None:
+                raise self.ex_on_send
+            return 10           # arbitrary choice
+        def recv(self, len):
+            if isinstance(self.recv_result, Exception):
+                raise self.recv_result
+            ret = self.recv_result
+            self.recv_result = b'' # if called again, return empty data
+            return ret
+        def fileno(self):
+            return 42           # arbitrary choice
+
+    class LoggerWrapper():
+        '''A simple wrapper of logger to inspect log messages.'''
+        def __init__(self, logger):
+            self.error_called = 0
+            self.warn_called = 0
+            self.debug_called = 0
+            self.orig_logger = logger
+        def error(self, *args):
+            self.error_called += 1
+            self.orig_logger.error(*args)
+        def warn(self, *args):
+            self.warn_called += 1
+            self.orig_logger.warn(*args)
+        def debug(self, *args):
+            self.debug_called += 1
+            self.orig_logger.debug(*args)
+
+    def mock_kill_socket(self, fileno, sock):
+        '''A replacement of MsgQ.kill_socket method for inspection.'''
+        self.__killed_socket = (fileno, sock)
+        if fileno in self.__msgq.sockets:
+            del self.__msgq.sockets[fileno]
+
+    def setUp(self):
+        self.__msgq = MsgQ()
+        self.__msgq.kill_socket = self.mock_kill_socket
+        self.__sock = self.MockSocket()
+        self.__data = b'dummy'
+        self.__msgq.sockets[42] = self.__sock
+        self.__msgq.sendbuffs[42] = (None, b'testdata')
+        self.__sock_error = socket.error()
+        self.__killed_socket = None
+        self.__logger = self.LoggerWrapper(msgq.logger)
+        msgq.logger = self.__logger
+
+    def tearDown(self):
+        msgq.logger = self.__logger.orig_logger
+
+    def test_send_data(self):
+        # Successful case: _send_data() returns the hardcoded value, and
+        # setblocking() is called twice with the expected parameters
+        self.assertEqual(10, self.__msgq._send_data(self.__sock, self.__data))
+        self.assertEqual([0, 1], self.__sock.blockings)
+        self.assertIsNone(self.__killed_socket)
+
+    def test_send_data_interrupt(self):
+        '''send() is interruptted. send_data() returns 0, sock isn't killed.'''
+        expected_blockings = []
+        for eno in [errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR]:
+            self.__sock_error.errno = eno
+            self.__sock.ex_on_send = self.__sock_error
+            self.assertEqual(0, self.__msgq._send_data(self.__sock,
+                                                       self.__data))
+            expected_blockings.extend([0, 1])
+            self.assertEqual(expected_blockings, self.__sock.blockings)
+            self.assertIsNone(self.__killed_socket)
+
+    def test_send_data_error(self):
+        '''Unexpected error happens on send().  The socket is killed.
+
+        If the error is EPIPE, it's logged at the warn level; otherwise
+        an error message is logged.
+
+        '''
+        expected_blockings = []
+        expected_errors = 0
+        expected_warns = 0
+        for eno in [errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS]:
+            self.__sock_error.errno = eno
+            self.__sock.ex_on_send = self.__sock_error
+            self.__killed_socket = None # clear any previuos value
+            self.assertEqual(None, self.__msgq._send_data(self.__sock,
+                                                          self.__data))
+            self.assertEqual((42, self.__sock), self.__killed_socket)
+            expected_blockings.extend([0, 1])
+            self.assertEqual(expected_blockings, self.__sock.blockings)
+
+            if eno == errno.EPIPE:
+                expected_warns += 1
+            else:
+                expected_errors += 1
+            self.assertEqual(expected_errors, self.__logger.error_called)
+            self.assertEqual(expected_warns, self.__logger.warn_called)
+
+    def test_process_fd_read_after_bad_write(self):
+        '''Check the specific case of write fail followed by read attempt.
+
+        The write failure results in kill_socket, then read shouldn't tried.
+
+        '''
+        self.__sock_error.errno = errno.EPIPE
+        self.__sock.ex_on_send = self.__sock_error
+        self.__msgq.process_socket = None # if called, trigger an exception
+        self.__msgq._process_fd(42, True, True, False) # shouldn't crash
+
+        # check the socket is deleted from the fileno=>sock dictionary
+        self.assertEqual({}, self.__msgq.sockets)
+
+    def test_process_fd_close_after_bad_write(self):
+        '''Similar to the previous, but for checking dup'ed kill attempt'''
+        self.__sock_error.errno = errno.EPIPE
+        self.__sock.ex_on_send = self.__sock_error
+        self.__msgq._process_fd(42, True, False, True) # shouldn't crash
+        self.assertEqual({}, self.__msgq.sockets)
+
+    def test_process_fd_writer_after_close(self):
+        '''Emulate a "writable" socket has been already closed and killed.'''
+        # This just shouldn't crash
+        self.__msgq._process_fd(4200, True, False, False)
+
+    def test_process_packet(self):
+        '''Check some failure cases in handling an incoming message.'''
+        expected_errors = 0
+        expected_debugs = 0
+
+        # if socket.recv() fails due to socket.error, it will be logged
+        # as error and the socket will be killed regardless of errno.
+        for eno in [errno.ENOBUFS, errno.ECONNRESET]:
+            self.__sock_error.errno = eno
+            self.__sock.recv_result = self.__sock_error
+            self.__killed_socket = None # clear any previuos value
+            self.__msgq.process_packet(42, self.__sock)
+            self.assertEqual((42, self.__sock), self.__killed_socket)
+            expected_errors += 1
+            self.assertEqual(expected_errors, self.__logger.error_called)
+            self.assertEqual(expected_debugs, self.__logger.debug_called)
+
+        # if socket.recv() returns empty data, the result depends on whether
+        # there's any preceding data; in the second case below, at least
+        # 6 bytes of data will be expected, and the second call to our faked
+        # recv() returns empty data.  In that case it will be logged as error.
+        for recv_data in [b'', b'short']:
+            self.__sock.recv_result = recv_data
+            self.__killed_socket = None
+            self.__msgq.process_packet(42, self.__sock)
+            self.assertEqual((42, self.__sock), self.__killed_socket)
+            if len(recv_data) == 0:
+                expected_debugs += 1
+            else:
+                expected_errors += 1
+            self.assertEqual(expected_errors, self.__logger.error_called)
+            self.assertEqual(expected_debugs, self.__logger.debug_called)
+
 if __name__ == '__main__':
     isc.log.resetUnitTestRootLogger()
     unittest.main()

+ 2 - 0
src/bin/stats/stats.py.in

@@ -696,3 +696,5 @@ if __name__ == "__main__":
         sys.exit(1)
     except KeyboardInterrupt as kie:
         logger.info(STATS_STOPPED_BY_KEYBOARD)
+
+    logger.info(STATS_EXITING)

+ 2 - 0
src/bin/stats/stats_httpd.py.in

@@ -631,3 +631,5 @@ if __name__ == "__main__":
         sys.exit(1)
     except KeyboardInterrupt as kie:
         logger.info(STATSHTTPD_STOPPED_BY_KEYBOARD)
+
+    logger.info(STATSHTTPD_EXITING)

+ 9 - 6
src/bin/stats/stats_httpd_messages.mes

@@ -32,10 +32,19 @@ address and port number.
 Debug message indicating that the stats-httpd module is disconnecting
 from the command and control bus.
 
+% STATSHTTPD_EXITING exiting
+The stats HTTP server is exiting.
+
 % STATSHTTPD_HANDLE_CONFIG reading configuration: %1
 The stats-httpd daemon has received new configuration data and will now
 process it. The (changed) data is printed.
 
+% STATSHTTPD_HTTPLOG %1 %2
+Debug HTTP log message. These are the messages logged by the http server
+instance. For most logs, the message shows HTTP client and query
+information like HTTP method, URI, and status code, but the http server
+can also log other information, such as extended status reports.
+
 % STATSHTTPD_RECEIVED_SHUTDOWN_COMMAND shutdown command received
 A shutdown command was sent to the stats-httpd module, and it will
 now shut down.
@@ -96,9 +105,3 @@ configuration is unknown. The new configuration is ignored, and an
 error is sent back. As possible cause is that there was an upgrade
 problem, and the stats-httpd version is out of sync with the rest of
 the system.
-
-% STATSHTTPD_HTTPLOG %1 %2
-Debug HTTP log message. These are the messages logged by the http server
-instance. For most logs, the message shows HTTP client and query
-information like HTTP method, URI, and status code, but the http server
-can also log other information, such as extended status reports.

+ 3 - 0
src/bin/stats/stats_messages.mes

@@ -24,6 +24,9 @@ The stats module was unable to connect to the BIND 10 command and
 control bus. A likely problem is that the message bus daemon
 (b10-msgq) is not running. The stats module will now shut down.
 
+% STATS_EXITING exiting
+The stats module process is exiting.
+
 % STATS_RECEIVED_INVALID_STATISTICS_DATA received invalid statistics data from %1
 Invalid statistics data has been received from the module while
 polling and it has been discarded.

+ 2 - 0
src/bin/xfrin/xfrin.py.in

@@ -1738,5 +1738,7 @@ def main(xfrin_class, use_signal=True):
     if xfrind:
         xfrind.shutdown()
 
+    logger.info(XFRIN_EXITING)
+
 if __name__ == '__main__':
     main(Xfrin)

+ 3 - 0
src/bin/xfrin/xfrin_messages.mes

@@ -60,6 +60,9 @@ error is given in the log message.
 There was an error opening a connection to the master. The error is
 shown in the log message.
 
+% XFRIN_EXITING exiting
+The xfrin daemon is exiting.
+
 % XFRIN_GOT_INCREMENTAL_RESP got incremental response for %1
 In an attempt of IXFR processing, the beginning SOA of the first difference
 (following the initial SOA that specified the final SOA for all the

+ 2 - 1
src/bin/xfrout/xfrout.py.in

@@ -1274,7 +1274,7 @@ if '__main__' == __name__:
         xfrout_server = XfroutServer()
         xfrout_server.run()
     except KeyboardInterrupt:
-        logger.INFO(XFROUT_STOPPED_BY_KEYBOARD)
+        logger.info(XFROUT_STOPPED_BY_KEYBOARD)
     except SessionError as e:
         logger.error(XFROUT_CC_SESSION_ERROR, str(e))
     except ModuleCCSessionError as e:
@@ -1287,3 +1287,4 @@ if '__main__' == __name__:
     if xfrout_server:
         xfrout_server.shutdown()
 
+    logger.info(XFROUT_EXITING)

+ 3 - 0
src/bin/xfrout/xfrout_messages.mes

@@ -32,6 +32,9 @@ configuration manager b10-cfgmgr is not running.
 The xfrout process encountered an error when installing the configuration at
 startup time.  Details of the error are included in the log message.
 
+% XFROUT_EXITING exiting
+The xfrout daemon is exiting.
+
 % XFROUT_FETCH_REQUEST_ERROR socket error while fetching a request from the auth daemon
 There was a socket error while contacting the b10-auth daemon to
 fetch a transfer request. The auth daemon may have shutdown.

+ 1 - 1
src/bin/zonemgr/zonemgr.py.in

@@ -714,4 +714,4 @@ if '__main__' == __name__:
     if zonemgrd and zonemgrd.running:
         zonemgrd.shutdown()
 
-    logger.debug(DBG_START_SHUT, ZONEMGR_SHUTDOWN)
+    logger.info(ZONEMGR_SHUTDOWN)

+ 1 - 1
src/bin/zonemgr/zonemgr_messages.mes

@@ -114,7 +114,7 @@ connecting to the command channel daemon.  The most usual cause of this
 problem is that the daemon is not running.
 
 % ZONEMGR_SHUTDOWN zone manager has shut down
-A debug message, output when the zone manager has shut down completely.
+The zone manager has shut down completely.
 
 % ZONEMGR_STARTED zonemgr started
 This informational message is output by zonemgr when all initialization

+ 4 - 0
src/lib/cc/cc_messages.mes

@@ -60,6 +60,10 @@ and its length (2 bytes) is counted in the total length.
 There should be data representing the length of message on the socket, but it
 is not there.
 
+% CC_LNAME_RECEIVED received local name: %1
+Debug message: the local module received its unique identifier (name)
+from msgq on completion of establishing the session with msgq.
+
 % CC_NO_MESSAGE no message ready to be received yet
 The program polled for incoming messages, but there was no message waiting.
 This is a debug message which may happen only after CC_GROUP_RECEIVE.

+ 1 - 0
src/lib/cc/session.cc

@@ -333,6 +333,7 @@ Session::establish(const char* socket_file) {
     recvmsg(routing, msg, false);
 
     impl_->lname_ = msg->get("lname")->stringValue();
+    LOG_DEBUG(logger, DBG_TRACE_DETAILED, CC_LNAME_RECEIVED).arg(impl_->lname_);
 
     // At this point there's no risk of resource leak.
     session_holder.clear();

+ 14 - 1
src/lib/python/isc/cc/Makefile.am

@@ -1,6 +1,19 @@
 SUBDIRS = . tests
 
-python_PYTHON =	__init__.py data.py session.py message.py
+python_PYTHON =	__init__.py data.py session.py message.py logger.py
+BUILT_SOURCES = $(PYTHON_LOGMSGPKG_DIR)/work/pycc_messages.py
+nodist_pylogmessage_PYTHON = $(PYTHON_LOGMSGPKG_DIR)/work/pycc_messages.py
+pylogmessagedir = $(pyexecdir)/isc/log_messages/
+
+CLEANFILES = $(PYTHON_LOGMSGPKG_DIR)/work/pycc_messages.py
+CLEANFILES += $(PYTHON_LOGMSGPKG_DIR)/work/pycc_messages.pyc
+
+EXTRA_DIST = pycc_messages.mes
+
+# Define rule to build logging source files from message file
+$(PYTHON_LOGMSGPKG_DIR)/work/pycc_messages.py: pycc_messages.mes
+	$(top_builddir)/src/lib/log/compiler/message \
+		-d $(PYTHON_LOGMSGPKG_DIR)/work -p $(srcdir)/pycc_messages.mes
 
 pythondir = $(pyexecdir)/isc/cc
 

+ 26 - 0
src/lib/python/isc/cc/logger.py

@@ -0,0 +1,26 @@
+# Copyright (C) 2013  Internet Systems Consortium.
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
+# DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+""" This is a logging utility module for other modules of the cc library
+package.
+
+"""
+
+import isc.log
+
+# C++ version of the CC module uses 'cc'; using the same name does not
+# necessarily cause disruption, but we use a different name to minimize
+# possible confusion.
+logger = isc.log.Logger('pycc')

+ 20 - 0
src/lib/python/isc/cc/pycc_messages.mes

@@ -0,0 +1,20 @@
+# Copyright (C) 2013  Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# No namespace declaration - these constants go in the global namespace
+# of the libddns_messages python module.
+
+% PYCC_LNAME_RECEIVED received local name: %1
+Debug message: the local module received its unique identifier (name)
+from msgq on completion of establishing the session with msgq.

+ 5 - 0
src/lib/python/isc/cc/session.py

@@ -22,6 +22,9 @@ import threading
 import bind10_config
 
 import isc.cc.message
+import isc.log
+from isc.cc.logger import logger
+from isc.log_messages.pycc_messages import *
 
 class ProtocolError(Exception): pass
 class NetworkError(Exception): pass
@@ -60,6 +63,8 @@ class Session:
             self._lname = msg["lname"]
             if not self._lname:
                 raise ProtocolError("Could not get local name")
+            logger.debug(logger.DBGLVL_TRACE_BASIC, PYCC_LNAME_RECEIVED,
+                         self._lname)
         except socket.error as se:
                 raise SessionError(se)
 

+ 2 - 1
src/lib/python/isc/config/cfgmgr.py

@@ -593,7 +593,7 @@ class ConfigManager:
     def run(self):
         """Runs the configuration manager."""
         self.running = True
-        while (self.running):
+        while self.running:
             # we just wait eternally for any command here, so disable
             # timeouts for this specific recv
             self.cc.set_timeout(0)
@@ -608,3 +608,4 @@ class ConfigManager:
                 # Only respond if there actually is something to respond with
                 if answer is not None:
                     self.cc.group_reply(env, answer)
+        logger.info(CFGMGR_STOPPED_BY_COMMAND)

+ 3 - 0
src/lib/python/isc/config/cfgmgr_messages.mes

@@ -71,6 +71,9 @@ error is given. The most likely cause is that the system does not have
 write access to the configuration database file. The updated
 configuration is not stored.
 
+% CFGMGR_STOPPED_BY_COMMAND received shutdown command, shutting down
+The configuration manager received a shutdown command, and is exiting.
+
 % CFGMGR_STOPPED_BY_KEYBOARD keyboard interrupt, shutting down
 There was a keyboard interrupt signal to stop the cfgmgr daemon. The
 daemon will now shut down.

+ 2 - 0
src/lib/python/isc/log_messages/Makefile.am

@@ -18,6 +18,7 @@ EXTRA_DIST += loadzone_messages.py
 EXTRA_DIST += server_common_messages.py
 EXTRA_DIST += dbutil_messages.py
 EXTRA_DIST += msgq_messages.py
+EXTRA_DIST += pycc_messages.py
 
 CLEANFILES = __init__.pyc
 CLEANFILES += init_messages.pyc
@@ -37,6 +38,7 @@ CLEANFILES += loadzone_messages.pyc
 CLEANFILES += server_common_messages.pyc
 CLEANFILES += dbutil_messages.pyc
 CLEANFILES += msgq_messages.pyc
+CLEANFILES += pycc_messages.pyc
 
 CLEANDIRS = __pycache__
 

+ 1 - 0
src/lib/python/isc/log_messages/pycc_messages.py

@@ -0,0 +1 @@
+from work.pycc_messages import *