Browse Source

[2244] make sure restarting components only when they are in config.

this should fix the main problem of this ticket.
JINMEI Tatuya 12 years ago
parent
commit
1645116db4

+ 10 - 0
src/bin/bind10/bind10_messages.mes

@@ -295,3 +295,13 @@ the configuration manager to start up.  The total length of time Boss
 will wait for the configuration manager before reporting an error is
 set with the command line --wait switch, which has a default value of
 ten seconds.
+
+% BIND10_RESTART_COMPONENT_SKIPPED Skipped restarting a component %1
+The boss module tried to restart a component after it failed (crashed)
+unexpectedly, but the boss then found that the component had been removed
+from its local configuration of components to run.  This is an unusal
+situation but can happen if the administrator removes the component from
+the configuration after the component's crash and before the restart time.
+The boss module simply skipped restarting that module, and the whole syste
+went back to the expected state (except that the crash itself is likely
+to be a bug).

+ 6 - 1
src/bin/bind10/bind10_src.py.in

@@ -771,7 +771,12 @@ class BoB:
         next_restart_time = None
         now = time.time()
         for component in self.components_to_restart:
-            if not component.restart(now):
+            # If the component was removed from the configurator between since
+            # scheduled to restart, just ignore it.  The object will just be
+            # dropped here.
+            if not self._component_configurator.has_component(component):
+                logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
+            elif not component.restart(now):
                 still_dead.append(component)
                 if next_restart_time is None or\
                    next_restart_time > component.get_restart_time():

+ 35 - 0
src/bin/bind10/tests/bind10_test.py.in

@@ -929,7 +929,14 @@ class MockComponent:
         self.name = lambda: name
         self.pid = lambda: pid
         self.address = lambda: address
+        self.restarted = False
 
+    def get_restart_time(self):
+        return 0                # arbitrary dummy value
+
+    def restart(self, now):
+        self.restarted = True
+        return True
 
 class TestBossCmd(unittest.TestCase):
     def test_ping(self):
@@ -1266,6 +1273,34 @@ class TestBossComponents(unittest.TestCase):
         bob.start_all_components()
         self.__check_extended(self.__param)
 
+    def __setup_restart(self, bob, component):
+        '''Common procedure for restarting a component used below.'''
+        bob.components_to_restart = { component }
+        component.restarted = False
+        bob.restart_processes()
+
+    def test_restart_processes(self):
+        '''Check some behavior on restarting processes.'''
+        bob = MockBob()
+        bob.runnable = True
+        component = MockComponent('test', 53)
+
+        # A component to be restarted will actually be restarted iff it's
+        # in the configurator's configuration.
+        # We bruteforce the configurator internal below; ugly, but the easiest
+        # way for the test.
+        bob._component_configurator._components['test'] = (None, component)
+        self.__setup_restart(bob, component)
+        self.assertTrue(component.restarted)
+        self.assertFalse(component in bob.components_to_restart)
+
+        # Remove the component from the configuration.  It won't be restarted
+        # even if scheduled, nor will remain in the to-be-restarted list.
+        del bob._component_configurator._components['test']
+        self.__setup_restart(bob, component)
+        self.assertFalse(component.restarted)
+        self.assertFalse(component in bob.components_to_restart)
+
 class SocketSrvTest(unittest.TestCase):
     """
     This tests some methods of boss related to the unix domain sockets used