Browse Source

[2244] make sure restarting components only when they are in config.

this should fix the main problem of this ticket.
JINMEI Tatuya 12 years ago
parent
commit
1645116db4

+ 10 - 0
src/bin/bind10/bind10_messages.mes

@@ -295,3 +295,13 @@ the configuration manager to start up.  The total length of time Boss
 will wait for the configuration manager before reporting an error is
 will wait for the configuration manager before reporting an error is
 set with the command line --wait switch, which has a default value of
 set with the command line --wait switch, which has a default value of
 ten seconds.
 ten seconds.
+
+% BIND10_RESTART_COMPONENT_SKIPPED Skipped restarting a component %1
+The boss module tried to restart a component after it failed (crashed)
+unexpectedly, but the boss then found that the component had been removed
+from its local configuration of components to run.  This is an unusal
+situation but can happen if the administrator removes the component from
+the configuration after the component's crash and before the restart time.
+The boss module simply skipped restarting that module, and the whole syste
+went back to the expected state (except that the crash itself is likely
+to be a bug).

+ 6 - 1
src/bin/bind10/bind10_src.py.in

@@ -771,7 +771,12 @@ class BoB:
         next_restart_time = None
         next_restart_time = None
         now = time.time()
         now = time.time()
         for component in self.components_to_restart:
         for component in self.components_to_restart:
-            if not component.restart(now):
+            # If the component was removed from the configurator between since
+            # scheduled to restart, just ignore it.  The object will just be
+            # dropped here.
+            if not self._component_configurator.has_component(component):
+                logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
+            elif not component.restart(now):
                 still_dead.append(component)
                 still_dead.append(component)
                 if next_restart_time is None or\
                 if next_restart_time is None or\
                    next_restart_time > component.get_restart_time():
                    next_restart_time > component.get_restart_time():

+ 35 - 0
src/bin/bind10/tests/bind10_test.py.in

@@ -929,7 +929,14 @@ class MockComponent:
         self.name = lambda: name
         self.name = lambda: name
         self.pid = lambda: pid
         self.pid = lambda: pid
         self.address = lambda: address
         self.address = lambda: address
+        self.restarted = False
 
 
+    def get_restart_time(self):
+        return 0                # arbitrary dummy value
+
+    def restart(self, now):
+        self.restarted = True
+        return True
 
 
 class TestBossCmd(unittest.TestCase):
 class TestBossCmd(unittest.TestCase):
     def test_ping(self):
     def test_ping(self):
@@ -1266,6 +1273,34 @@ class TestBossComponents(unittest.TestCase):
         bob.start_all_components()
         bob.start_all_components()
         self.__check_extended(self.__param)
         self.__check_extended(self.__param)
 
 
+    def __setup_restart(self, bob, component):
+        '''Common procedure for restarting a component used below.'''
+        bob.components_to_restart = { component }
+        component.restarted = False
+        bob.restart_processes()
+
+    def test_restart_processes(self):
+        '''Check some behavior on restarting processes.'''
+        bob = MockBob()
+        bob.runnable = True
+        component = MockComponent('test', 53)
+
+        # A component to be restarted will actually be restarted iff it's
+        # in the configurator's configuration.
+        # We bruteforce the configurator internal below; ugly, but the easiest
+        # way for the test.
+        bob._component_configurator._components['test'] = (None, component)
+        self.__setup_restart(bob, component)
+        self.assertTrue(component.restarted)
+        self.assertFalse(component in bob.components_to_restart)
+
+        # Remove the component from the configuration.  It won't be restarted
+        # even if scheduled, nor will remain in the to-be-restarted list.
+        del bob._component_configurator._components['test']
+        self.__setup_restart(bob, component)
+        self.assertFalse(component.restarted)
+        self.assertFalse(component in bob.components_to_restart)
+
 class SocketSrvTest(unittest.TestCase):
 class SocketSrvTest(unittest.TestCase):
     """
     """
     This tests some methods of boss related to the unix domain sockets used
     This tests some methods of boss related to the unix domain sockets used