Browse Source

[2617] log EPIPE at the warn level. added more detailed tests for send_data.

JINMEI Tatuya 12 years ago
parent
commit
73ca6b06d1
3 changed files with 134 additions and 12 deletions
  1. 17 12
      src/bin/msgq/msgq.py.in
  2. 19 0
      src/bin/msgq/msgq_messages.mes
  3. 98 0
      src/bin/msgq/tests/msgq_test.py

+ 17 - 12
src/bin/msgq/msgq.py.in

@@ -431,9 +431,12 @@ class MsgQ:
     def sendmsg(self, sock, env, msg = None):
         self.send_prepared_msg(sock, self.preparemsg(env, msg))
 
-    def __send_data(self, sock, data):
+    def _send_data(self, sock, data):
         """
-        Send a piece of data to the given socket.
+        Send a piece of data to the given socket.  This method is
+        essentially "private" to MsgQ, but defined as if it were "protected"
+        for easier access from tests.
+
         Parameters:
         sock: The socket to send to
         data: The list of bytes to send
@@ -449,15 +452,17 @@ class MsgQ:
             sock.setblocking(0)
             return sock.send(data)
         except socket.error as e:
-            if e.errno in [ errno.EAGAIN,
-                            errno.EWOULDBLOCK,
-                            errno.EINTR ]:
+            if e.errno in [ errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR ]:
                 return 0
-            elif e.errno in [ errno.EPIPE,
-                              errno.ECONNRESET,
-                              errno.ENOBUFS ]:
-                logger.error(MSGQ_SEND_ERR, sock.fileno(),
-                             errno.errorcode[e.errno])
+            elif e.errno in [ errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS ]:
+                # EPIPE happens if the remote module has terminated by the time
+                # of this send; its severity can vary, but in many cases it
+                # shouldn't be critical, so we log it separately as a warning.
+                if e.errno == errno.EPIPE:
+                    logger.warn(MSGQ_CLOSE_ON_SEND, sock.fileno())
+                else:
+                    logger.error(MSGQ_SEND_ERR, sock.fileno(),
+                                 errno.errorcode[e.errno])
                 self.kill_socket(sock.fileno(), sock)
                 return None
             else:
@@ -472,7 +477,7 @@ class MsgQ:
         if fileno in self.sendbuffs:
             amount_sent = 0
         else:
-            amount_sent = self.__send_data(sock, msg)
+            amount_sent = self._send_data(sock, msg)
             if amount_sent is None:
                 # Socket has been killed, drop the send
                 return
@@ -501,7 +506,7 @@ class MsgQ:
         # Try to send some data from the buffer
         (_, msg) = self.sendbuffs[fileno]
         sock = self.sockets[fileno]
-        amount_sent = self.__send_data(sock, msg)
+        amount_sent = self._send_data(sock, msg)
         if amount_sent is not None:
             # Keep the rest
             msg = msg[amount_sent:]

+ 19 - 0
src/bin/msgq/msgq_messages.mes

@@ -84,6 +84,25 @@ Debug message. This message includes the whole routing header of a packet.
 There was a low-level error when sending data to a socket. The error is logged
 and the corresponding socket is dropped.
 
+% MSGQ_CLOSE_ON_SEND Sending to socket failed as already closed (okay to ignore on shutdown): FD=%1
+The msgq daemon tries to send some data to a client module, but it
+failed because the socket has been closed.  This normally means the
+client terminates (for some reason - either gracefully or as a crash)
+while other modules try to send a message to the terminated module.
+Since msgq doesn't keep track of the status of client modules, this
+can happen and is not really an error for msgq; however, it can still
+be an unexpected event for the BIND 10 system as a whole in that this
+particular message is lost, so it's logged as a warning.  If this
+message is logged for a running BIND 10 system, it's suggested to
+check other log messages; there may be an error from other modules
+reporting a missing response message.  One common, less critical case
+where this message is logged is during shutdown.  The ordering of
+process shutdown is basically arbitrary at this moment, so it's
+possible that some module tries to send a "quitting" message to some
+other module but the latter has already shut down.  Such cases are
+generally non critical, but you may want to check other possible error
+messages.
+
 % MSGQ_SHUTDOWN Stopping Msgq
 Debug message. The message queue is shutting down.
 

+ 98 - 0
src/bin/msgq/tests/msgq_test.py

@@ -1,3 +1,4 @@
+import msgq
 from msgq import SubscriptionManager, MsgQ
 
 import unittest
@@ -559,6 +560,103 @@ class ThreadTests(unittest.TestCase):
         test_thread.join(60)
         self.assertTrue(self.__result)
 
+class SocketTests(unittest.TestCase):
+    '''Test cases for micro behaviors related to socket operations.
+
+    Some cases are covered as part of other tests, but in this fixture
+    we check more details of specific method related socket operation,
+    with the help mock classes to avoid expensive overhead.
+
+    '''
+    class MockSocket():
+        '''A mock socket used instead of standard socket objects.'''
+        def __init__(self):
+            self.ex_on_send = None # raised from send() if not None
+            self.blockings = [] # history of setblocking() params
+        def setblocking(self, on):
+            self.blockings.append(on)
+        def send(self, data):
+            if self.ex_on_send is not None:
+                raise self.ex_on_send
+            return 10           # arbitrary choice
+        def fileno(self):
+            return 42           # arbitrary choice
+
+    class LoggerWrapper():
+        '''A simple wrapper of logger to inspect log messages.'''
+        def __init__(self, logger):
+            self.error_called = 0
+            self.warn_called = 0
+            self.orig_logger = logger
+        def error(self, id, fd_arg, sock_arg):
+            self.error_called += 1
+            self.orig_logger.error(id, fd_arg, sock_arg)
+        def warn(self, id, fd_arg):
+            self.warn_called += 1
+            self.orig_logger.warn(id, fd_arg)
+
+    def mock_kill_socket(self, fileno, sock):
+        '''A replacement of MsgQ.kill_socket method for inspection.'''
+        self.__killed_socket = (fileno, sock)
+
+    def setUp(self):
+        self.__msgq = MsgQ()
+        self.__msgq.kill_socket = self.mock_kill_socket
+        self.__sock = self.MockSocket()
+        self.__data = b'dummy'
+        self.__sock_error = socket.error()
+        self.__killed_socket = None
+        self.__logger = self.LoggerWrapper(msgq.logger)
+        msgq.logger = self.__logger
+
+    def tearDown(self):
+        msgq.logger = self.__logger.orig_logger
+
+    def test_send_data(self):
+        # Successful case: _send_data() returns the hardcoded value, and
+        # setblocking() is called twice with the expected parameters
+        self.assertEqual(10, self.__msgq._send_data(self.__sock, self.__data))
+        self.assertEqual([0, 1], self.__sock.blockings)
+        self.assertIsNone(self.__killed_socket)
+
+    def test_send_data_interrupt(self):
+        '''send() is interruptted. send_data() returns 0, sock isn't killed.'''
+        expected_blockings = []
+        for eno in [errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR]:
+            self.__sock_error.errno = eno
+            self.__sock.ex_on_send = self.__sock_error
+            self.assertEqual(0, self.__msgq._send_data(self.__sock,
+                                                       self.__data))
+            expected_blockings.extend([0, 1])
+            self.assertEqual(expected_blockings, self.__sock.blockings)
+            self.assertIsNone(self.__killed_socket)
+
+    def test_send_data_error(self):
+        '''Unexpected error happens on send().  The socket is killed.
+
+        If the error is EPIPE, it's logged at the warn level; otherwise
+        an error message is logged.
+
+        '''
+        expected_blockings = []
+        expected_errors = 0
+        expected_warns = 0
+        for eno in [errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS]:
+            self.__sock_error.errno = eno
+            self.__sock.ex_on_send = self.__sock_error
+            self.assertEqual(None, self.__msgq._send_data(self.__sock,
+                                                          self.__data))
+            self.assertEqual((42, self.__sock), self.__killed_socket)
+            expected_blockings.extend([0, 1])
+            self.assertEqual(expected_blockings, self.__sock.blockings)
+
+            if eno == errno.EPIPE:
+                expected_warns += 1
+            else:
+                expected_errors += 1
+            self.assertEqual(expected_errors, self.__logger.error_called)
+            self.assertEqual(expected_warns, self.__logger.warn_called)
+
 if __name__ == '__main__':
     isc.log.resetUnitTestRootLogger()
     unittest.main()