Browse Source

[213] Failures of core components

Michal 'vorner' Vaner 13 years ago
parent
commit
81bb03bbb0

+ 22 - 1
src/lib/python/isc/bind10/component.py

@@ -14,6 +14,7 @@
 # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 from isc.log_messages.bind10_messages import *
+import time
 
 logger = isc.log.Logger("boss")
 
@@ -56,6 +57,11 @@ class Component:
             to restart it and keeps running.
         """
         self.__running = False
+        # Dead like really dead. No resurrection possible.
+        self.__dead = False
+        self.__kind = kind
+        self.__boss = boss
+
     def start(self):
         """
         Start the component for the first time or restart it. If you need to
@@ -64,16 +70,20 @@ class Component:
 
         If you try to start an already running component, it raises ValueError.
         """
+        if self.__dead:
+            raise ValueError("Can't resurrect already dead component")
         if self.running():
             raise ValueError("Can't start already running component")
         self.start_internal()
         self.__running = True
+
     def start_internal(self):
         """
         This method does the actual starting of a process. If you need to
         change the way the component is started, replace this method.
         """
         pass
+
     def stop(self):
         """
         Stop the component. If you need to modify the way a component is
@@ -87,12 +97,14 @@ class Component:
             raise ValueError("Can't stop a component which is not running")
         self.stop_internal()
         self.__running = False
+
     def stop_internal(self):
         """
         This is the method that does the actual stopping of a component.
         You can replace this method if you want a different way to do it.
         """
         pass
+
     def failed(self):
         """
         Notify the component it crashed. This will be called from boss object.
@@ -100,14 +112,23 @@ class Component:
         If you try to call failed on a component that is not running,
         a ValueError is raised.
         """
-        pass
+        self.failed_internal()
+        self.__running = False
+        if self.__kind == 'core':
+            self.__dead = True
+            self.__boss.shutdown(1)
+
     def failed_internal(self):
         """
         This method is called from failed. You can replace it if you need
         some specific behaviour when the component crashes. The default
         implementation is empty.
+
+        Do not raise exceptions from here, please. The propper shutdown
+        would have not happened.
         """
         pass
+
     def running(self):
         """
         Informs if the component is currently running. It assumes the failed

+ 82 - 10
src/lib/python/isc/bind10/tests/component_test.py

@@ -19,6 +19,7 @@ Tests for the bind10.component module
 
 import unittest
 import isc.log
+import time
 from isc.bind10.component import Component
 
 class ComponentTests(unittest.TestCase):
@@ -34,6 +35,8 @@ class ComponentTests(unittest.TestCase):
         self.__start_called = False
         self.__stop_called = False
         self.__failed_called = False
+        # Back up the time function, we may want to replace it with something
+        self.__orig_time = isc.bind10.component.time.time
 
     def shutdown(self, exitcode=0):
         """
@@ -41,6 +44,17 @@ class ComponentTests(unittest.TestCase):
         """
         self.__shutdown = True
         self.__exitcode = None
+        # Return the original time function
+        isc.bind10.component.time.time = self.__orig_time
+
+    def timeskip(self):
+        """
+        Skip in time to future some 30s. Implemented by replacing the
+        time.time function in the tested module with function that returns
+        current time increased by 30.
+        """
+        tm = time.time()
+        isc.bind10.component.time.time = lambda: tm + 30
 
     def start(self):
         """
@@ -78,28 +92,56 @@ class ComponentTests(unittest.TestCase):
         component.failed_internal = self.fail
         return component
 
-    def do_start_stop(self, kind):
+    def check_startup(self, component):
         """
-        This is a body of a test. It creates a componend of given kind,
-        then starts it and stops it. It checks correct functions are called
-        and the component's status is correct.
-
-        It also checks the component can't be started/stopped twice.
+        Check that nothing was called yet. A newly created component should
+        not get started right away, so this should pass after the creation.
         """
-        # Create it and check it did not do any funny stuff yet
-        component = self.create_component(kind)
         self.assertFalse(self.__shutdown)
         self.assertFalse(self.__start_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__failed_called)
         self.assertFalse(component.running())
-        # Start it and check it called the correct starting functions
-        component.start()
+
+    def check_started(self, component):
+        """
+        Check the component was started, but not stopped anyhow yet.
+        """
         self.assertFalse(self.__shutdown)
         self.assertTrue(self.__start_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__failed_called)
         self.assertTrue(component.running())
+
+    def check_dead(self, component):
+        """
+        Check the component is completely dead, and the server too.
+        """
+        self.assertTrue(self.__shutdown)
+        self.assertTrue(self.__start_called)
+        self.assertFalse(self.__stop_called)
+        self.assertTrue(self.__failed_called)
+        self.assertNotEqual(0, self.__exitcode)
+        self.assertFalse(component.running())
+        # Surely it can't be stopped again
+        self.assertRaises(ValueError, component.stop)
+        # Nor started
+        self.assertRaises(ValueError, component.start)
+
+    def do_start_stop(self, kind):
+        """
+        This is a body of a test. It creates a componend of given kind,
+        then starts it and stops it. It checks correct functions are called
+        and the component's status is correct.
+
+        It also checks the component can't be started/stopped twice.
+        """
+        # Create it and check it did not do any funny stuff yet
+        component = self.create_component(kind)
+        self.check_startup(component)
+        # Start it and check it called the correct starting functions
+        component.start()
+        self.check_started(component)
         # Check it can't be started twice
         self.assertRaises(ValueError, component.start)
         # Stop it again and check
@@ -131,6 +173,36 @@ class ComponentTests(unittest.TestCase):
         """
         self.do_start_stop('dispensable')
 
+    def test_start_fail_core(self):
+        """
+        Start and then fail a core component. It should stop the whole server.
+        """
+        # Just ordinary startup
+        component = self.create_component('core')
+        self.check_startup(component)
+        component.start()
+        self.check_started(component)
+        # Pretend the component died
+        component.failed()
+        # It should bring down the whole server
+        self.check_dead(component)
+
+    def test_start_fail_core_later(self):
+        """
+        Start and then fail a core component, but let it be running for longer time.
+        It should still stop the whole server.
+        """
+        # Just ordinary startup
+        component = self.create_component('core')
+        self.check_startup(component)
+        component.start()
+        self.check_started(component)
+        self.timeskip()
+        # Pretend the componend died some time later
+        component.failed()
+        # Check the component is still dead
+        self.check_dead(component)
+
 if __name__ == '__main__':
     isc.log.init("bind10") # FIXME Should this be needed?
     isc.log.resetUnitTestRootLogger()