Browse Source

[213] Failures of core components

Michal 'vorner' Vaner 13 years ago
parent
commit
81bb03bbb0

+ 22 - 1
src/lib/python/isc/bind10/component.py

@@ -14,6 +14,7 @@
 # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 
 from isc.log_messages.bind10_messages import *
 from isc.log_messages.bind10_messages import *
+import time
 
 
 logger = isc.log.Logger("boss")
 logger = isc.log.Logger("boss")
 
 
@@ -56,6 +57,11 @@ class Component:
             to restart it and keeps running.
             to restart it and keeps running.
         """
         """
         self.__running = False
         self.__running = False
+        # Dead like really dead. No resurrection possible.
+        self.__dead = False
+        self.__kind = kind
+        self.__boss = boss
+
     def start(self):
     def start(self):
         """
         """
         Start the component for the first time or restart it. If you need to
         Start the component for the first time or restart it. If you need to
@@ -64,16 +70,20 @@ class Component:
 
 
         If you try to start an already running component, it raises ValueError.
         If you try to start an already running component, it raises ValueError.
         """
         """
+        if self.__dead:
+            raise ValueError("Can't resurrect already dead component")
         if self.running():
         if self.running():
             raise ValueError("Can't start already running component")
             raise ValueError("Can't start already running component")
         self.start_internal()
         self.start_internal()
         self.__running = True
         self.__running = True
+
     def start_internal(self):
     def start_internal(self):
         """
         """
         This method does the actual starting of a process. If you need to
         This method does the actual starting of a process. If you need to
         change the way the component is started, replace this method.
         change the way the component is started, replace this method.
         """
         """
         pass
         pass
+
     def stop(self):
     def stop(self):
         """
         """
         Stop the component. If you need to modify the way a component is
         Stop the component. If you need to modify the way a component is
@@ -87,12 +97,14 @@ class Component:
             raise ValueError("Can't stop a component which is not running")
             raise ValueError("Can't stop a component which is not running")
         self.stop_internal()
         self.stop_internal()
         self.__running = False
         self.__running = False
+
     def stop_internal(self):
     def stop_internal(self):
         """
         """
         This is the method that does the actual stopping of a component.
         This is the method that does the actual stopping of a component.
         You can replace this method if you want a different way to do it.
         You can replace this method if you want a different way to do it.
         """
         """
         pass
         pass
+
     def failed(self):
     def failed(self):
         """
         """
         Notify the component it crashed. This will be called from boss object.
         Notify the component it crashed. This will be called from boss object.
@@ -100,14 +112,23 @@ class Component:
         If you try to call failed on a component that is not running,
         If you try to call failed on a component that is not running,
         a ValueError is raised.
         a ValueError is raised.
         """
         """
-        pass
+        self.failed_internal()
+        self.__running = False
+        if self.__kind == 'core':
+            self.__dead = True
+            self.__boss.shutdown(1)
+
     def failed_internal(self):
     def failed_internal(self):
         """
         """
         This method is called from failed. You can replace it if you need
         This method is called from failed. You can replace it if you need
         some specific behaviour when the component crashes. The default
         some specific behaviour when the component crashes. The default
         implementation is empty.
         implementation is empty.
+
+        Do not raise exceptions from here, please. The propper shutdown
+        would have not happened.
         """
         """
         pass
         pass
+
     def running(self):
     def running(self):
         """
         """
         Informs if the component is currently running. It assumes the failed
         Informs if the component is currently running. It assumes the failed

+ 82 - 10
src/lib/python/isc/bind10/tests/component_test.py

@@ -19,6 +19,7 @@ Tests for the bind10.component module
 
 
 import unittest
 import unittest
 import isc.log
 import isc.log
+import time
 from isc.bind10.component import Component
 from isc.bind10.component import Component
 
 
 class ComponentTests(unittest.TestCase):
 class ComponentTests(unittest.TestCase):
@@ -34,6 +35,8 @@ class ComponentTests(unittest.TestCase):
         self.__start_called = False
         self.__start_called = False
         self.__stop_called = False
         self.__stop_called = False
         self.__failed_called = False
         self.__failed_called = False
+        # Back up the time function, we may want to replace it with something
+        self.__orig_time = isc.bind10.component.time.time
 
 
     def shutdown(self, exitcode=0):
     def shutdown(self, exitcode=0):
         """
         """
@@ -41,6 +44,17 @@ class ComponentTests(unittest.TestCase):
         """
         """
         self.__shutdown = True
         self.__shutdown = True
         self.__exitcode = None
         self.__exitcode = None
+        # Return the original time function
+        isc.bind10.component.time.time = self.__orig_time
+
+    def timeskip(self):
+        """
+        Skip in time to future some 30s. Implemented by replacing the
+        time.time function in the tested module with function that returns
+        current time increased by 30.
+        """
+        tm = time.time()
+        isc.bind10.component.time.time = lambda: tm + 30
 
 
     def start(self):
     def start(self):
         """
         """
@@ -78,28 +92,56 @@ class ComponentTests(unittest.TestCase):
         component.failed_internal = self.fail
         component.failed_internal = self.fail
         return component
         return component
 
 
-    def do_start_stop(self, kind):
+    def check_startup(self, component):
         """
         """
-        This is a body of a test. It creates a componend of given kind,
-        then starts it and stops it. It checks correct functions are called
-        and the component's status is correct.
-
-        It also checks the component can't be started/stopped twice.
+        Check that nothing was called yet. A newly created component should
+        not get started right away, so this should pass after the creation.
         """
         """
-        # Create it and check it did not do any funny stuff yet
-        component = self.create_component(kind)
         self.assertFalse(self.__shutdown)
         self.assertFalse(self.__shutdown)
         self.assertFalse(self.__start_called)
         self.assertFalse(self.__start_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__failed_called)
         self.assertFalse(self.__failed_called)
         self.assertFalse(component.running())
         self.assertFalse(component.running())
-        # Start it and check it called the correct starting functions
-        component.start()
+
+    def check_started(self, component):
+        """
+        Check the component was started, but not stopped anyhow yet.
+        """
         self.assertFalse(self.__shutdown)
         self.assertFalse(self.__shutdown)
         self.assertTrue(self.__start_called)
         self.assertTrue(self.__start_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__stop_called)
         self.assertFalse(self.__failed_called)
         self.assertFalse(self.__failed_called)
         self.assertTrue(component.running())
         self.assertTrue(component.running())
+
+    def check_dead(self, component):
+        """
+        Check the component is completely dead, and the server too.
+        """
+        self.assertTrue(self.__shutdown)
+        self.assertTrue(self.__start_called)
+        self.assertFalse(self.__stop_called)
+        self.assertTrue(self.__failed_called)
+        self.assertNotEqual(0, self.__exitcode)
+        self.assertFalse(component.running())
+        # Surely it can't be stopped again
+        self.assertRaises(ValueError, component.stop)
+        # Nor started
+        self.assertRaises(ValueError, component.start)
+
+    def do_start_stop(self, kind):
+        """
+        This is a body of a test. It creates a componend of given kind,
+        then starts it and stops it. It checks correct functions are called
+        and the component's status is correct.
+
+        It also checks the component can't be started/stopped twice.
+        """
+        # Create it and check it did not do any funny stuff yet
+        component = self.create_component(kind)
+        self.check_startup(component)
+        # Start it and check it called the correct starting functions
+        component.start()
+        self.check_started(component)
         # Check it can't be started twice
         # Check it can't be started twice
         self.assertRaises(ValueError, component.start)
         self.assertRaises(ValueError, component.start)
         # Stop it again and check
         # Stop it again and check
@@ -131,6 +173,36 @@ class ComponentTests(unittest.TestCase):
         """
         """
         self.do_start_stop('dispensable')
         self.do_start_stop('dispensable')
 
 
+    def test_start_fail_core(self):
+        """
+        Start and then fail a core component. It should stop the whole server.
+        """
+        # Just ordinary startup
+        component = self.create_component('core')
+        self.check_startup(component)
+        component.start()
+        self.check_started(component)
+        # Pretend the component died
+        component.failed()
+        # It should bring down the whole server
+        self.check_dead(component)
+
+    def test_start_fail_core_later(self):
+        """
+        Start and then fail a core component, but let it be running for longer time.
+        It should still stop the whole server.
+        """
+        # Just ordinary startup
+        component = self.create_component('core')
+        self.check_startup(component)
+        component.start()
+        self.check_started(component)
+        self.timeskip()
+        # Pretend the componend died some time later
+        component.failed()
+        # Check the component is still dead
+        self.check_dead(component)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     isc.log.init("bind10") # FIXME Should this be needed?
     isc.log.init("bind10") # FIXME Should this be needed?
     isc.log.resetUnitTestRootLogger()
     isc.log.resetUnitTestRootLogger()