Browse Source

[1673] Decode exit status in boss

Mukund Sivaraman 13 years ago
parent
commit
1cd0d0e4fc

+ 1 - 1
doc/guide/bind10-messages.xml

@@ -598,7 +598,7 @@ needs a dedicated message bus.
 </varlistentry>
 </varlistentry>
 
 
 <varlistentry id="BIND10_COMPONENT_FAILED">
 <varlistentry id="BIND10_COMPONENT_FAILED">
-<term>BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status</term>
+<term>BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3</term>
 <listitem><para>
 <listitem><para>
 The process terminated, but the bind10 boss didn't expect it to, which means
 The process terminated, but the bind10 boss didn't expect it to, which means
 it must have failed.
 it must have failed.

+ 1 - 1
src/bin/bind10/bind10_messages.mes

@@ -24,7 +24,7 @@ needs a dedicated message bus.
 An error was encountered when the boss module specified
 An error was encountered when the boss module specified
 statistics data which is invalid for the boss specification file.
 statistics data which is invalid for the boss specification file.
 
 
-% BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status
+% BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3
 The process terminated, but the bind10 boss didn't expect it to, which means
 The process terminated, but the bind10 boss didn't expect it to, which means
 it must have failed.
 it must have failed.
 
 

+ 33 - 1
src/lib/python/isc/bind10/component.py

@@ -30,6 +30,8 @@ configuration). This is yet to be designed.
 import isc.log
 import isc.log
 from isc.log_messages.bind10_messages import *
 from isc.log_messages.bind10_messages import *
 import time
 import time
+import os
+import signal
 
 
 logger = isc.log.Logger("boss")
 logger = isc.log.Logger("boss")
 DBG_TRACE_DATA = 20
 DBG_TRACE_DATA = 20
@@ -45,6 +47,14 @@ STATE_DEAD = 'dead'
 STATE_STOPPED = 'stopped'
 STATE_STOPPED = 'stopped'
 STATE_RUNNING = 'running'
 STATE_RUNNING = 'running'
 
 
+def get_signame(signal_number):
+    """Return the symbolic name for a signal."""
+    for sig in dir(signal):
+        if sig.startswith("SIG") and sig[3].isalnum():
+            if getattr(signal, sig) == signal_number:
+                return sig
+    return "unknown signal"
+
 class BaseComponent:
 class BaseComponent:
     """
     """
     This represents a single component. This one is an abstract base class.
     This represents a single component. This one is an abstract base class.
@@ -206,8 +216,30 @@ class BaseComponent:
                 it is considered a core or needed component, or because
                 it is considered a core or needed component, or because
                 the component is to be restarted later.
                 the component is to be restarted later.
         """
         """
+
+        if exit_code is not None:
+            if os.WIFEXITED(exit_code):
+                exit_str = "process exited normally with exit status %d" % (exit_code)
+            elif os.WIFCONTINUED(exit_code):
+                exit_str = "process continued with exit status %d" % (exit_code)
+            elif os.WIFSTOPPED(exit_code):
+                sig = os.WSTOPSIG(exit_code)
+                signame = get_signame(sig)
+                exit_str = "process stopped with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame)
+            elif os.WIFSIGNALED(exit_code):
+                if os.WCOREDUMP(exit_code):
+                    exit_str = "process dumped core with exit status %d" % (exit_code)
+                else:
+                    sig = os.WTERMSIG(exit_code)
+                    signame = get_signame(sig)
+                    exit_str = "process terminated with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame)
+            else:
+                exit_str = "unknown condition with exit status %d" % (exit_code)
+        else:
+            exit_str = "unknown condition"
+
         logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(),
         logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(),
-                     exit_code if exit_code is not None else "unknown")
+                     exit_str)
         if not self.running():
         if not self.running():
             raise ValueError("Can't fail component that isn't running")
             raise ValueError("Can't fail component that isn't running")
         self.__state = STATE_STOPPED
         self.__state = STATE_STOPPED