Browse Source

[master] Merge branch 'trac988'

JINMEI Tatuya 13 years ago
parent
commit
95a03bbefb
3 changed files with 101 additions and 25 deletions
  1. 48 0
      src/bin/xfrout/tests/xfrout_test.py.in
  2. 34 17
      src/bin/xfrout/xfrout.py.in
  3. 19 8
      src/bin/xfrout/xfrout_messages.mes

+ 48 - 0
src/bin/xfrout/tests/xfrout_test.py.in

@@ -60,6 +60,9 @@ class MySocket():
         self.sendqueue.extend(data);
         return len(data)
 
+    def fileno(self):
+        return 42               # simply return a constant dummy value
+
     def readsent(self):
         if len(self.sendqueue) >= 2:
             size = 2 + struct.unpack("!H", self.sendqueue[:2])[0]
@@ -1155,6 +1158,15 @@ class TestUnixSockServer(unittest.TestCase):
     def setUp(self):
         self.write_sock, self.read_sock = socket.socketpair()
         self.unix = MyUnixSockServer()
+        # Some test below modify these module-wide attributes.  We'll need
+        # to restore them at the end of each test, so we remember them here.
+        self.__select_bak = xfrout.select.select
+        self.__recv_fd_back = xfrout.recv_fd
+
+    def tearDown(self):
+        # Restore possibly faked module-wide attributes.
+        xfrout.select.select = self.__select_bak
+        xfrout.recv_fd = self.__recv_fd_back
 
     def test_tsig_keyring(self):
         """
@@ -1410,6 +1422,42 @@ class TestUnixSockServer(unittest.TestCase):
         sys.stdout = old_stdout
         os.rmdir(dir_name)
 
+    def __fake_select(self, r, w, e):
+        '''select emulator used in select_loop_fail test.'''
+        # This simplified faked function assumes to be called at most once,
+        # and in that case just return a pre-configured "readable" sockets.
+        if self.__select_count > 0:
+            raise RuntimeError('select called unexpected number of times')
+        self.__select_count += 1
+        return (self.__select_return_redable, [], [])
+
+    def test_select_loop_fail(self):
+        '''Check failure events in the main loop.'''
+        # setup faked select() environments
+        self.unix._read_sock = MySocket(socket.AF_INET6, socket.SOCK_STREAM)
+        xfrout.select.select = self.__fake_select
+        self.__select_return_redable = [MySocket(socket.AF_INET6,
+                                                 socket.SOCK_STREAM)]
+
+        # Check that loop terminates if recv_fd() fails.
+        for ret_code in [-1, FD_SYSTEM_ERROR]:
+            # fake recv_fd so it returns the faked failure code.
+            xfrout.recv_fd = lambda fileno: ret_code
+
+            # reset the counter, go to the loop.
+            self.__select_count = 0
+            self.unix._select_loop(self.__select_return_redable[0])
+            # select should have been called exactly once.
+            self.assertEqual(1, self.__select_count)
+
+        # Next, we test the case where recf_fd succeeds but receiving the
+        # request fails.
+        self.__select_count = 0
+        xfrout.recv_fd = lambda fileno: 1
+        self.unix._receive_query_message = lambda fd: None
+        self.unix._select_loop(self.__select_return_redable[0])
+        self.assertEqual(1, self.__select_count)
+
 class TestInitialization(unittest.TestCase):
     def setEnv(self, name, value):
         if value is None:

+ 34 - 17
src/bin/xfrout/xfrout.py.in

@@ -678,30 +678,40 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
         except socket.error:
             logger.error(XFROUT_FETCH_REQUEST_ERROR)
             return
+        self._select_loop(request)
+
+    def _select_loop(self, request_sock):
+        '''Main loop for a single session between xfrout and auth.
+
+        This is a dedicated subroutine of handle_request(), but is defined
+        as a separate "protected" method for the convenience of tests.
+        '''
 
         # Check self._shutdown_event to ensure the real shutdown comes.
         # Linux could trigger a spurious readable event on the _read_sock
         # due to a bug, so we need perform a double check.
         while not self._shutdown_event.is_set(): # Check if xfrout is shutdown
             try:
-                (rlist, wlist, xlist) = select.select([self._read_sock, request], [], [])
+                (rlist, wlist, xlist) = select.select([self._read_sock,
+                                                       request_sock], [], [])
             except select.error as e:
                 if e.args[0] == errno.EINTR:
                     (rlist, wlist, xlist) = ([], [], [])
                     continue
                 else:
-                    logger.error(XFROUT_SOCKET_SELECT_ERROR, str(e))
+                    logger.error(XFROUT_SOCKET_SELECT_ERROR, e)
                     break
 
-            # self.server._shutdown_event will be set by now, if it is not a false
-            # alarm
+            # self.server._shutdown_event will be set by now, if it is not a
+            # false alarm
             if self._read_sock in rlist:
                 continue
 
             try:
-                self.process_request(request)
+                if not self.process_request(request_sock):
+                    break
             except Exception as pre:
-                logger.error(XFROUT_PROCESS_REQUEST_ERROR, str(pre))
+                logger.error(XFROUT_PROCESS_REQUEST_ERROR, pre)
                 break
 
     def _handle_request_noblock(self):
@@ -713,26 +723,33 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
 
     def process_request(self, request):
         """Receive socket fd and query message from auth, then
-        start a new thread to process the request."""
+        start a new thread to process the request.
+
+        Return: True if everything is okay; otherwise False, in which case
+        the calling thread will terminate.
+
+        """
         sock_fd = recv_fd(request.fileno())
         if sock_fd < 0:
-            # This may happen when one xfrout process try to connect to
-            # xfrout unix socket server, to check whether there is another
-            # xfrout running.
-            if sock_fd == FD_SYSTEM_ERROR:
-                logger.error(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
-            return
+            logger.warn(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
+            return False
 
-        # receive request msg
+        # receive request msg.  If it fails we simply terminate the thread;
+        # it might be possible to recover from this state, but it's more likely
+        # that auth and xfrout are in inconsistent states.  So it will make
+        # more sense to restart in a new session.
         request_data = self._receive_query_message(request)
-        if not request_data:
-            return
+        if request_data is None:
+            # The specific exception type doesn't matter so we use session
+            # error.
+            raise XfroutSessionError('Failed to get complete xfr request')
 
         t = threading.Thread(target=self.finish_request,
-                             args = (sock_fd, request_data))
+                             args=(sock_fd, request_data))
         if self.daemon_threads:
             t.daemon = True
         t.start()
+        return True
 
     def _guess_remote(self, sock_fd):
         """Guess remote address and port of the socket.

+ 19 - 8
src/bin/xfrout/xfrout_messages.mes

@@ -115,11 +115,15 @@ In general, this should only occur for unexpected problems like
 memory allocation failures, as the query should already have been
 parsed by the b10-auth daemon, before it was passed here.
 
-% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %2
-There was an error processing a transfer request. The error is included
-in the log message, but at this point no specific information other
-than that could be given. This points to incomplete exception handling
-in the code.
+% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %1
+There was an error in receiving a transfer request from b10-auth.
+This is generally an unexpected event, but is possible when, for
+example, b10-auth terminates in the middle of forwarding the request.
+When this happens it's unlikely to be recoverable with the same
+communication session with b10-auth, so b10-xfrout drops it and
+waits for a new session.  In any case, this error indicates that
+there's something very wrong in the system, so it's advisable to check
+the over all status of the BIND 10 system.
 
 % XFROUT_QUERY_DROPPED %1 client %2: request to transfer %3 dropped
 The xfrout process silently dropped a request to transfer zone to
@@ -149,9 +153,16 @@ and will now shut down.
 
 % XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR error receiving the file descriptor for an XFR connection
 There was an error receiving the file descriptor for the transfer
-request. Normally, the request is received by b10-auth, and passed on
-to the xfrout daemon, so it can answer directly. However, there was a
-problem receiving this file descriptor. The request will be ignored.
+request from b10-auth.  There can be several reasons for this, but
+the most likely cause is that b10-auth terminates for some reason
+(maybe it's a bug of b10-auth, maybe it's an intentional restart by
+the administrator), so depending on how this happens it may or may not
+be a serious error.  But in any case this is not expected to happen
+frequently, and it's advisable to figure out how this happened if
+this message is logged.  Even if this error happens xfrout will reset
+its internal state and will keep receiving further requests.  So
+if it's just a temporary restart of b10-auth the administrator does
+not have to do anything.
 
 % XFROUT_REMOVE_OLD_UNIX_SOCKET_FILE_ERROR error removing unix socket file %1: %2
 The unix socket file xfrout needs for contact with the auth daemon