Browse Source

[1673] Decode exit status in boss

Mukund Sivaraman 13 years ago
parent
commit
1cd0d0e4fc

+ 1 - 1
doc/guide/bind10-messages.xml

@@ -598,7 +598,7 @@ needs a dedicated message bus.
 </varlistentry>
 
 <varlistentry id="BIND10_COMPONENT_FAILED">
-<term>BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status</term>
+<term>BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3</term>
 <listitem><para>
 The process terminated, but the bind10 boss didn't expect it to, which means
 it must have failed.

+ 1 - 1
src/bin/bind10/bind10_messages.mes

@@ -24,7 +24,7 @@ needs a dedicated message bus.
 An error was encountered when the boss module specified
 statistics data which is invalid for the boss specification file.
 
-% BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status
+% BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3
 The process terminated, but the bind10 boss didn't expect it to, which means
 it must have failed.
 

+ 33 - 1
src/lib/python/isc/bind10/component.py

@@ -30,6 +30,8 @@ configuration). This is yet to be designed.
 import isc.log
 from isc.log_messages.bind10_messages import *
 import time
+import os
+import signal
 
 logger = isc.log.Logger("boss")
 DBG_TRACE_DATA = 20
@@ -45,6 +47,14 @@ STATE_DEAD = 'dead'
 STATE_STOPPED = 'stopped'
 STATE_RUNNING = 'running'
 
+def get_signame(signal_number):
+    """Return the symbolic name for a signal."""
+    for sig in dir(signal):
+        if sig.startswith("SIG") and sig[3].isalnum():
+            if getattr(signal, sig) == signal_number:
+                return sig
+    return "unknown signal"
+
 class BaseComponent:
     """
     This represents a single component. This one is an abstract base class.
@@ -206,8 +216,30 @@ class BaseComponent:
                 it is considered a core or needed component, or because
                 the component is to be restarted later.
         """
+
+        if exit_code is not None:
+            if os.WIFEXITED(exit_code):
+                exit_str = "process exited normally with exit status %d" % (exit_code)
+            elif os.WIFCONTINUED(exit_code):
+                exit_str = "process continued with exit status %d" % (exit_code)
+            elif os.WIFSTOPPED(exit_code):
+                sig = os.WSTOPSIG(exit_code)
+                signame = get_signame(sig)
+                exit_str = "process stopped with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame)
+            elif os.WIFSIGNALED(exit_code):
+                if os.WCOREDUMP(exit_code):
+                    exit_str = "process dumped core with exit status %d" % (exit_code)
+                else:
+                    sig = os.WTERMSIG(exit_code)
+                    signame = get_signame(sig)
+                    exit_str = "process terminated with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame)
+            else:
+                exit_str = "unknown condition with exit status %d" % (exit_code)
+        else:
+            exit_str = "unknown condition"
+
         logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(),
-                     exit_code if exit_code is not None else "unknown")
+                     exit_str)
         if not self.running():
             raise ValueError("Can't fail component that isn't running")
         self.__state = STATE_STOPPED