Browse Source

[master] Merge branch 'trac988'

JINMEI Tatuya 13 years ago
parent
commit
95a03bbefb

+ 48 - 0
src/bin/xfrout/tests/xfrout_test.py.in

@@ -60,6 +60,9 @@ class MySocket():
         self.sendqueue.extend(data);
         self.sendqueue.extend(data);
         return len(data)
         return len(data)
 
 
+    def fileno(self):
+        return 42               # simply return a constant dummy value
+
     def readsent(self):
     def readsent(self):
         if len(self.sendqueue) >= 2:
         if len(self.sendqueue) >= 2:
             size = 2 + struct.unpack("!H", self.sendqueue[:2])[0]
             size = 2 + struct.unpack("!H", self.sendqueue[:2])[0]
@@ -1155,6 +1158,15 @@ class TestUnixSockServer(unittest.TestCase):
     def setUp(self):
     def setUp(self):
         self.write_sock, self.read_sock = socket.socketpair()
         self.write_sock, self.read_sock = socket.socketpair()
         self.unix = MyUnixSockServer()
         self.unix = MyUnixSockServer()
+        # Some test below modify these module-wide attributes.  We'll need
+        # to restore them at the end of each test, so we remember them here.
+        self.__select_bak = xfrout.select.select
+        self.__recv_fd_back = xfrout.recv_fd
+
+    def tearDown(self):
+        # Restore possibly faked module-wide attributes.
+        xfrout.select.select = self.__select_bak
+        xfrout.recv_fd = self.__recv_fd_back
 
 
     def test_tsig_keyring(self):
     def test_tsig_keyring(self):
         """
         """
@@ -1410,6 +1422,42 @@ class TestUnixSockServer(unittest.TestCase):
         sys.stdout = old_stdout
         sys.stdout = old_stdout
         os.rmdir(dir_name)
         os.rmdir(dir_name)
 
 
+    def __fake_select(self, r, w, e):
+        '''select emulator used in select_loop_fail test.'''
+        # This simplified faked function assumes to be called at most once,
+        # and in that case just return a pre-configured "readable" sockets.
+        if self.__select_count > 0:
+            raise RuntimeError('select called unexpected number of times')
+        self.__select_count += 1
+        return (self.__select_return_redable, [], [])
+
+    def test_select_loop_fail(self):
+        '''Check failure events in the main loop.'''
+        # setup faked select() environments
+        self.unix._read_sock = MySocket(socket.AF_INET6, socket.SOCK_STREAM)
+        xfrout.select.select = self.__fake_select
+        self.__select_return_redable = [MySocket(socket.AF_INET6,
+                                                 socket.SOCK_STREAM)]
+
+        # Check that loop terminates if recv_fd() fails.
+        for ret_code in [-1, FD_SYSTEM_ERROR]:
+            # fake recv_fd so it returns the faked failure code.
+            xfrout.recv_fd = lambda fileno: ret_code
+
+            # reset the counter, go to the loop.
+            self.__select_count = 0
+            self.unix._select_loop(self.__select_return_redable[0])
+            # select should have been called exactly once.
+            self.assertEqual(1, self.__select_count)
+
+        # Next, we test the case where recf_fd succeeds but receiving the
+        # request fails.
+        self.__select_count = 0
+        xfrout.recv_fd = lambda fileno: 1
+        self.unix._receive_query_message = lambda fd: None
+        self.unix._select_loop(self.__select_return_redable[0])
+        self.assertEqual(1, self.__select_count)
+
 class TestInitialization(unittest.TestCase):
 class TestInitialization(unittest.TestCase):
     def setEnv(self, name, value):
     def setEnv(self, name, value):
         if value is None:
         if value is None:

+ 34 - 17
src/bin/xfrout/xfrout.py.in

@@ -678,30 +678,40 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
         except socket.error:
         except socket.error:
             logger.error(XFROUT_FETCH_REQUEST_ERROR)
             logger.error(XFROUT_FETCH_REQUEST_ERROR)
             return
             return
+        self._select_loop(request)
+
+    def _select_loop(self, request_sock):
+        '''Main loop for a single session between xfrout and auth.
+
+        This is a dedicated subroutine of handle_request(), but is defined
+        as a separate "protected" method for the convenience of tests.
+        '''
 
 
         # Check self._shutdown_event to ensure the real shutdown comes.
         # Check self._shutdown_event to ensure the real shutdown comes.
         # Linux could trigger a spurious readable event on the _read_sock
         # Linux could trigger a spurious readable event on the _read_sock
         # due to a bug, so we need perform a double check.
         # due to a bug, so we need perform a double check.
         while not self._shutdown_event.is_set(): # Check if xfrout is shutdown
         while not self._shutdown_event.is_set(): # Check if xfrout is shutdown
             try:
             try:
-                (rlist, wlist, xlist) = select.select([self._read_sock, request], [], [])
+                (rlist, wlist, xlist) = select.select([self._read_sock,
+                                                       request_sock], [], [])
             except select.error as e:
             except select.error as e:
                 if e.args[0] == errno.EINTR:
                 if e.args[0] == errno.EINTR:
                     (rlist, wlist, xlist) = ([], [], [])
                     (rlist, wlist, xlist) = ([], [], [])
                     continue
                     continue
                 else:
                 else:
-                    logger.error(XFROUT_SOCKET_SELECT_ERROR, str(e))
+                    logger.error(XFROUT_SOCKET_SELECT_ERROR, e)
                     break
                     break
 
 
-            # self.server._shutdown_event will be set by now, if it is not a false
-            # alarm
+            # self.server._shutdown_event will be set by now, if it is not a
+            # false alarm
             if self._read_sock in rlist:
             if self._read_sock in rlist:
                 continue
                 continue
 
 
             try:
             try:
-                self.process_request(request)
+                if not self.process_request(request_sock):
+                    break
             except Exception as pre:
             except Exception as pre:
-                logger.error(XFROUT_PROCESS_REQUEST_ERROR, str(pre))
+                logger.error(XFROUT_PROCESS_REQUEST_ERROR, pre)
                 break
                 break
 
 
     def _handle_request_noblock(self):
     def _handle_request_noblock(self):
@@ -713,26 +723,33 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
 
 
     def process_request(self, request):
     def process_request(self, request):
         """Receive socket fd and query message from auth, then
         """Receive socket fd and query message from auth, then
-        start a new thread to process the request."""
+        start a new thread to process the request.
+
+        Return: True if everything is okay; otherwise False, in which case
+        the calling thread will terminate.
+
+        """
         sock_fd = recv_fd(request.fileno())
         sock_fd = recv_fd(request.fileno())
         if sock_fd < 0:
         if sock_fd < 0:
-            # This may happen when one xfrout process try to connect to
-            # xfrout unix socket server, to check whether there is another
-            # xfrout running.
-            if sock_fd == FD_SYSTEM_ERROR:
-                logger.error(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
-            return
+            logger.warn(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
+            return False
 
 
-        # receive request msg
+        # receive request msg.  If it fails we simply terminate the thread;
+        # it might be possible to recover from this state, but it's more likely
+        # that auth and xfrout are in inconsistent states.  So it will make
+        # more sense to restart in a new session.
         request_data = self._receive_query_message(request)
         request_data = self._receive_query_message(request)
-        if not request_data:
-            return
+        if request_data is None:
+            # The specific exception type doesn't matter so we use session
+            # error.
+            raise XfroutSessionError('Failed to get complete xfr request')
 
 
         t = threading.Thread(target=self.finish_request,
         t = threading.Thread(target=self.finish_request,
-                             args = (sock_fd, request_data))
+                             args=(sock_fd, request_data))
         if self.daemon_threads:
         if self.daemon_threads:
             t.daemon = True
             t.daemon = True
         t.start()
         t.start()
+        return True
 
 
     def _guess_remote(self, sock_fd):
     def _guess_remote(self, sock_fd):
         """Guess remote address and port of the socket.
         """Guess remote address and port of the socket.

+ 19 - 8
src/bin/xfrout/xfrout_messages.mes

@@ -115,11 +115,15 @@ In general, this should only occur for unexpected problems like
 memory allocation failures, as the query should already have been
 memory allocation failures, as the query should already have been
 parsed by the b10-auth daemon, before it was passed here.
 parsed by the b10-auth daemon, before it was passed here.
 
 
-% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %2
-There was an error processing a transfer request. The error is included
-in the log message, but at this point no specific information other
-than that could be given. This points to incomplete exception handling
-in the code.
+% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %1
+There was an error in receiving a transfer request from b10-auth.
+This is generally an unexpected event, but is possible when, for
+example, b10-auth terminates in the middle of forwarding the request.
+When this happens it's unlikely to be recoverable with the same
+communication session with b10-auth, so b10-xfrout drops it and
+waits for a new session.  In any case, this error indicates that
+there's something very wrong in the system, so it's advisable to check
+the over all status of the BIND 10 system.
 
 
 % XFROUT_QUERY_DROPPED %1 client %2: request to transfer %3 dropped
 % XFROUT_QUERY_DROPPED %1 client %2: request to transfer %3 dropped
 The xfrout process silently dropped a request to transfer zone to
 The xfrout process silently dropped a request to transfer zone to
@@ -149,9 +153,16 @@ and will now shut down.
 
 
 % XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR error receiving the file descriptor for an XFR connection
 % XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR error receiving the file descriptor for an XFR connection
 There was an error receiving the file descriptor for the transfer
 There was an error receiving the file descriptor for the transfer
-request. Normally, the request is received by b10-auth, and passed on
-to the xfrout daemon, so it can answer directly. However, there was a
-problem receiving this file descriptor. The request will be ignored.
+request from b10-auth.  There can be several reasons for this, but
+the most likely cause is that b10-auth terminates for some reason
+(maybe it's a bug of b10-auth, maybe it's an intentional restart by
+the administrator), so depending on how this happens it may or may not
+be a serious error.  But in any case this is not expected to happen
+frequently, and it's advisable to figure out how this happened if
+this message is logged.  Even if this error happens xfrout will reset
+its internal state and will keep receiving further requests.  So
+if it's just a temporary restart of b10-auth the administrator does
+not have to do anything.
 
 
 % XFROUT_REMOVE_OLD_UNIX_SOCKET_FILE_ERROR error removing unix socket file %1: %2
 % XFROUT_REMOVE_OLD_UNIX_SOCKET_FILE_ERROR error removing unix socket file %1: %2
 The unix socket file xfrout needs for contact with the auth daemon
 The unix socket file xfrout needs for contact with the auth daemon