Browse Source

[2617] reorder log message; some more comments and doc

JINMEI Tatuya 12 years ago
parent
commit
8431fb8b25
3 changed files with 62 additions and 26 deletions
  1. 27 1
      src/bin/msgq/msgq.py.in
  2. 25 25
      src/bin/msgq/msgq_messages.mes
  3. 10 0
      src/bin/msgq/tests/msgq_test.py

+ 27 - 1
src/bin/msgq/msgq.py.in

@@ -71,6 +71,15 @@ SPECFILE_LOCATION = SPECFILE_PATH + "/msgq.spec"
 class MsgQReceiveError(Exception): pass
 
 class MsgQCloseOnReceive(Exception):
+    '''Exception raised when reading data from a socket results in "shutdown.
+
+    This can be either getting 0-length data or via ECONNRESET socket.error
+    exception.  This class holds whether it happens in the middle of reading
+    (i.e. after reading some) via partial_read parameter, which is set to True
+    if and only if so.  This will be used by an upper layer cathing the
+    exception to distinguish severity of the event.
+
+    "'''
     def __init__(self, reason, partial_read):
         self.partial_read = partial_read
         self.__reason = reason
@@ -328,8 +337,12 @@ class MsgQ:
 
     def kill_socket(self, fd, sock):
         """Fully close down the socket."""
+        # Unregister events on the socket.  Note that we don't have to do
+        # this for kqueue because the registered events are automatically
+        # deleted when the corresponding socket is closed.
         if self.poller:
             self.poller.unregister(sock)
+
         self.subs.unsubscribe_all(sock)
         lname = [ k for k, v in self.lnames.items() if v == sock ][0]
         del self.lnames[lname]
@@ -341,11 +354,20 @@ class MsgQ:
 
     def __getbytes(self, fd, sock, length, continued):
         """Get exactly the requested bytes, or raise an exception if
-           EOF."""
+           EOF.
+
+           continued is set to True if this method is called to complete
+           already read data.
+           """
         received = b''
         while len(received) < length:
             try:
                 data = sock.recv(length - len(received))
+
+            # If the remote client has closed the socket there seems to be
+            # two possible cases: getting ECONNRESET or receiving empty data.
+            # These cases are possible in normal operation, so we report them
+            # using MsgQCloseOnReceive.
             except socket.error as err:
                 if err.errno == errno.ECONNRESET:
                     raise MsgQCloseOnReceive(str(err), continued)
@@ -382,6 +404,10 @@ class MsgQ:
         try:
             routing, data = self.read_packet(fd, sock)
         except (MsgQReceiveError, MsgQCloseOnReceive) as err:
+            # If it's MsgQCloseOnReceive and that happens without reading
+            # any data, it basically means the remote clinet has closed the
+            # socket, so we log it as debug information.  Otherwise, it's
+            # a somewhat unexpected event, so we consider it an "error".
             if isinstance(err, MsgQCloseOnReceive) and not err.partial_read:
                 logger.debug(TRACE_BASIC, MSGQ_CLOSE_ON_RECV, fd)
             else:

+ 25 - 25
src/bin/msgq/msgq_messages.mes

@@ -23,6 +23,31 @@
 This is a debug message. The message queue has little bit of special handling
 for the configuration manager. This special handling is happening now.
 
+% MSGQ_CLOSE_ON_RECV Reading from socket canceled as it's closed: FD=%1
+A debug message.  The msgq daemon was notified of a read event on a
+socket, but its initial read operation failed because the remote
+client has closed its socket.  This is possible in a normal operation
+when a module shuts down.
+
+% MSGQ_CLOSE_ON_SEND Sending to socket failed as already closed (okay to ignore on shutdown): FD=%1
+The msgq daemon tries to send some data to a client module, but it
+failed because the socket has been closed.  This normally means the
+client terminates (for some reason - either gracefully or as a crash)
+while other modules try to send a message to the terminated module.
+Since msgq doesn't keep track of the status of client modules, this
+can happen and is not really an error for msgq; however, it can still
+be an unexpected event for the BIND 10 system as a whole in that this
+particular message is lost, so it's logged as a warning.  If this
+message is logged for a running BIND 10 system, it's suggested to
+check other log messages; there may be an error from other modules
+reporting a missing response message.  One common, less critical case
+where this message is logged is during shutdown.  The ordering of
+process shutdown is basically arbitrary at this moment, so it's
+possible that some module tries to send a "quitting" message to some
+other module but the latter has already shut down.  Such cases are
+generally non critical, but you may want to check other possible error
+messages.
+
 % MSGQ_COMMAND Running command %1 with arguments %2
 Debug message. The message queue received a command and it is running it.
 
@@ -83,25 +108,6 @@ Debug message. This message includes the whole routing header of a packet.
 There was a low-level error when sending data to a socket. The error is logged
 and the corresponding socket is dropped.
 
-% MSGQ_CLOSE_ON_SEND Sending to socket failed as already closed (okay to ignore on shutdown): FD=%1
-The msgq daemon tries to send some data to a client module, but it
-failed because the socket has been closed.  This normally means the
-client terminates (for some reason - either gracefully or as a crash)
-while other modules try to send a message to the terminated module.
-Since msgq doesn't keep track of the status of client modules, this
-can happen and is not really an error for msgq; however, it can still
-be an unexpected event for the BIND 10 system as a whole in that this
-particular message is lost, so it's logged as a warning.  If this
-message is logged for a running BIND 10 system, it's suggested to
-check other log messages; there may be an error from other modules
-reporting a missing response message.  One common, less critical case
-where this message is logged is during shutdown.  The ordering of
-process shutdown is basically arbitrary at this moment, so it's
-possible that some module tries to send a "quitting" message to some
-other module but the latter has already shut down.  Such cases are
-generally non critical, but you may want to check other possible error
-messages.
-
 % MSGQ_SHUTDOWN Stopping Msgq
 Debug message. The message queue is shutting down.
 
@@ -127,9 +133,3 @@ data structure.
 % MSGQ_SUBS_NEW_TARGET Creating new target for subscription to group '%1' for instance '%2'
 Debug message. Creating a new subscription. Also creating a new data structure
 to hold it.
-
-% MSGQ_CLOSE_ON_RECV Reading from socket canceled as it's closed: FD=%1
-A debug message.  The msgq daemon was notified of a read event on a
-socket, but its initial read operation failed because the remote
-client has closed its socket.  This is possible in a normal operation
-when a module shuts down.

+ 10 - 0
src/bin/msgq/tests/msgq_test.py

@@ -659,6 +659,7 @@ class SocketTests(unittest.TestCase):
         for eno in [errno.EPIPE, errno.ECONNRESET, errno.ENOBUFS]:
             self.__sock_error.errno = eno
             self.__sock.ex_on_send = self.__sock_error
+            self.__killed_socket = None # clear any previuos value
             self.assertEqual(None, self.__msgq._send_data(self.__sock,
                                                           self.__data))
             self.assertEqual((42, self.__sock), self.__killed_socket)
@@ -703,9 +704,13 @@ class SocketTests(unittest.TestCase):
         expected_errors = 0
         expected_debugs = 0
 
+        # if socket.recv() fails due to socket.error, it will be logged
+        # as error or debug depending on the errno.  In either case the socket
+        # will be killed.
         for eno in [errno.ENOBUFS, errno.ECONNRESET]:
             self.__sock_error.errno = eno
             self.__sock.recv_result = self.__sock_error
+            self.__killed_socket = None # clear any previuos value
             self.__msgq.process_packet(42, self.__sock)
             self.assertEqual((42, self.__sock), self.__killed_socket)
             if eno == errno.ENOBUFS:
@@ -715,8 +720,13 @@ class SocketTests(unittest.TestCase):
             self.assertEqual(expected_errors, self.__logger.error_called)
             self.assertEqual(expected_debugs, self.__logger.debug_called)
 
+        # if socket.recv() returns empty data, the result depends on whether
+        # there's any preceding data; in the second case below, at least
+        # 6 bytes of data will be expected, and the second call to our faked
+        # recv() returns empty data.  In that case it will logged as error.
         for recv_data in [b'', b'short']:
             self.__sock.recv_result = recv_data
+            self.__killed_socket = None
             self.__msgq.process_packet(42, self.__sock)
             self.assertEqual((42, self.__sock), self.__killed_socket)
             if len(recv_data) == 0: