Parcourir la source

[213] Pass and log exit code

Michal 'vorner' Vaner il y a 13 ans
Parent
commit
c3a5acc657

+ 4 - 0
src/bin/bind10/bind10_messages.mes

@@ -20,6 +20,10 @@ The boss process is starting up and will now check if the message bus
 daemon is already running. If so, it will not be able to start, as it
 needs a dedicated message bus.
 
+% BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status
+The process terminated, but the bind10 boss didn't expect it to, which means
+it must have failed.
+
 % BIND10_COMPONENT_RESTART component %1 is about to restart
 The named component failed previously and we will try to restart it to provide
 as flawless service as possible, but it should be investigated what happened,

+ 1 - 1
src/bin/bind10/bind10_src.py.in

@@ -713,7 +713,7 @@ class BoB:
                 # Tell it it failed, but only if it matters at all (eg. it is
                 # running and we are running - if not, it should stop anyway)
                 if component.running() and self.runnable:
-                    component.failed()
+                    component.failed(exit_status)
             else:
                 logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
 

+ 6 - 2
src/lib/python/isc/bind10/component.py

@@ -137,7 +137,7 @@ class Component:
             self._start_internal()
         except Exception as e:
             logger.error(BIND10_COMPONENT_START_EXCEPTION, self.name(), e)
-            self.failed()
+            self.failed(None)
             raise
 
     def _start_internal(self):
@@ -199,7 +199,7 @@ class Component:
         # TODO Some way to wait for the process that doesn't want to
         # terminate and kill it would prove nice (or add it to boss somewhere?)
 
-    def failed(self):
+    def failed(self, exit_code):
         """
         Notify the component it crashed. This will be called from boss object.
 
@@ -211,7 +211,11 @@ class Component:
         down with error exit status. A dead component can't be started again.
 
         Otherwise the component will try to restart.
+
+        The exit code is used for logging. It might be None.
         """
+        logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(),
+                     exit_code if exit_code is not None else "unknown")
         if not self.running():
             raise ValueError("Can't fail component that isn't running")
         self.__state = STATE_STOPPED

+ 9 - 9
src/lib/python/isc/bind10/tests/component_test.py

@@ -179,7 +179,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         self.assertFalse(component.running())
         # We can't stop or fail the component yet
         self.assertRaises(ValueError, component.stop)
-        self.assertRaises(ValueError, component.failed)
+        self.assertRaises(ValueError, component.failed, 1)
 
     def __check_started(self, component):
         """
@@ -206,7 +206,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         # Nor started
         self.assertRaises(ValueError, component.start)
         # Nor it can fail again
-        self.assertRaises(ValueError, component.failed)
+        self.assertRaises(ValueError, component.failed, 1)
 
     def __check_restarted(self, component):
         """
@@ -254,7 +254,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         # Check it can't be stopped twice
         self.assertRaises(ValueError, component.stop)
         # Or failed
-        self.assertRaises(ValueError, component.failed)
+        self.assertRaises(ValueError, component.failed, 1)
         # But it can be started again if it is stopped
         # (no more checking here, just it doesn't crash)
         component.start()
@@ -287,7 +287,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         component.start()
         self.__check_started(component)
         # Pretend the component died
-        component.failed()
+        component.failed(1)
         # It should bring down the whole server
         self.__check_dead(component)
 
@@ -303,7 +303,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         self.__check_started(component)
         self._timeskip()
         # Pretend the component died some time later
-        component.failed()
+        component.failed(1)
         # Check the component is still dead
         self.__check_dead(component)
 
@@ -319,7 +319,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         component.start()
         self.__check_started(component)
         # Make it fail right away.
-        component.failed()
+        component.failed(1)
         self.__check_dead(component)
 
     def test_start_fail_needed_later(self):
@@ -335,7 +335,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         # Make it fail later on
         self.__start_called = False
         self._timeskip()
-        component.failed()
+        component.failed(1)
         self.__check_restarted(component)
 
     def test_start_fail_dispensable(self):
@@ -349,7 +349,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         self.__check_started(component)
         # Make it fail right away
         self.__start_called = False
-        component.failed()
+        component.failed(1)
         self.__check_restarted(component)
 
     def test_start_fail_dispensable(self):
@@ -365,7 +365,7 @@ class ComponentTests(BossUtils, unittest.TestCase):
         # Make it fail later on
         self.__start_called = False
         self._timeskip()
-        component.failed()
+        component.failed(1)
         self.__check_restarted(component)
 
     def test_fail_core(self):