Browse Source

[213] Logging for components

Michal 'vorner' Vaner 13 years ago
parent
commit
1e465d5417
2 changed files with 69 additions and 3 deletions
  1. 53 0
      src/bin/bind10/bind10_messages.mes
  2. 16 3
      src/lib/python/isc/bind10/component.py

+ 53 - 0
src/bin/bind10/bind10_messages.mes

@@ -20,6 +20,27 @@ The boss process is starting up and will now check if the message bus
 daemon is already running. If so, it will not be able to start, as it
 daemon is already running. If so, it will not be able to start, as it
 needs a dedicated message bus.
 needs a dedicated message bus.
 
 
+% BIND10_COMPONENT_RESTART component %1 is about to restart
+The named component failed previously and we will try to restart it to provide
+as flawless service as possible, but it should be investigated what happened,
+as it could happen again.
+
+% BIND10_COMPONENT_START component %1 is starting
+The named component is about to be started by the boss process.
+
+% BIND10_COMPONENT_START_EXCEPTION component %1 failed to start: %2
+An exception (mentioned in the message) happened during the startup of the
+named component. The componet is not considered started and further actions
+will be taken about it.
+
+% BIND10_COMPONENT_STOP component %1 is being stopped
+A component is about to be asked to stop willingly by the boss.
+
+% BIND10_COMPONENT_UNSATISFIED component %1 is required to run and failed
+A component failed for some reason (see previous messages). It is either a core
+component or needed component that was just started. In any case, the system
+can't continue without it and will terminate.
+
 % BIND10_CONFIGURATION_START_AUTH start authoritative server: %1
 % BIND10_CONFIGURATION_START_AUTH start authoritative server: %1
 This message shows whether or not the authoritative server should be
 This message shows whether or not the authoritative server should be
 started according to the configuration.
 started according to the configuration.
@@ -28,6 +49,38 @@ started according to the configuration.
 This message shows whether or not the resolver should be
 This message shows whether or not the resolver should be
 started according to the configuration.
 started according to the configuration.
 
 
+% BIND10_CONFIGURATOR_BUILD building plan '%1' -> '%2'
+A debug message. This indicates that the configurator is building a plan
+how to change configuration from the older one to newer one. This does no
+real work yet, it just does the planning what needs to be done.
+
+% BIND10_CONFIGURATOR_RECONFIGURE reconfiguring running components
+A different configuration of which components should be running is being
+installed. All components that are no longer needed will be stopped and
+newly introduced ones started. This happens at startup, when the configuration
+is read the first time, or when an operator changes configuration of the boss.
+
+% BIND10_CONFIGURATOR_RUN running plan of %1 tasks
+A debug message. The configurator is about to execute a plan of actions it
+computed previously.
+
+% BIND10_CONFIGURATOR_START bind10 component configurator is starting up
+The part that cares about starting and stopping the right component from the
+boss process is starting up. This happens only once at the startup of the
+boss process. It will start the basic set of processes now (the ones boss
+needs to read the configuration), the rest will be started after the
+configuration is known.
+
+% BIND10_CONFIGURATOR_STOP bind10 component configurator is shutting down
+The part that cares about starting and stopping processes in the boss is
+shutting down. All started components will be shut down now (more precisely,
+asked to terminate by their own, if they fail to comply, other parts of
+the boss process will try to force them).
+
+% BIND10_CONFIGURATOR_TASK performing task %1 on %2
+A debug message. The configurator is about to perform one task of the plan it
+is currently executing on the named component.
+
 % BIND10_INVALID_USER invalid user: %1
 % BIND10_INVALID_USER invalid user: %1
 The boss process was started with the -u option, to drop root privileges
 The boss process was started with the -u option, to drop root privileges
 and continue running as the specified user, but the user is unknown.
 and continue running as the specified user, but the user is unknown.

+ 16 - 3
src/lib/python/isc/bind10/component.py

@@ -14,12 +14,15 @@
 # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 
 import isc.bind10.sockcreator
 import isc.bind10.sockcreator
+import isc.log
 from isc.log_messages.bind10_messages import *
 from isc.log_messages.bind10_messages import *
 import time
 import time
 from bind10_config import LIBEXECDIR
 from bind10_config import LIBEXECDIR
 import os
 import os
 
 
 logger = isc.log.Logger("boss")
 logger = isc.log.Logger("boss")
+DBG_TRACE_DATA = 20
+DBG_TRACE_DETAILED = 80
 
 
 """
 """
 Module for managing components (abstraction of process). It allows starting
 Module for managing components (abstraction of process). It allows starting
@@ -89,11 +92,13 @@ class Component:
             raise ValueError("Can't resurrect already dead component")
             raise ValueError("Can't resurrect already dead component")
         if self.running():
         if self.running():
             raise ValueError("Can't start already running component")
             raise ValueError("Can't start already running component")
+        logger.info(BIND10_COMPONENT_START, self.name())
         self.__running = True
         self.__running = True
         self.__start_time = time.time()
         self.__start_time = time.time()
         try:
         try:
             self.start_internal()
             self.start_internal()
-        except:
+        except Exception as e:
+            logger.error(BIND10_COMPONENT_START_EXCEPTION, self.name(), e)
             self.failed()
             self.failed()
             raise
             raise
 
 
@@ -129,6 +134,7 @@ class Component:
         if not self.running():
         if not self.running():
             raise ValueError("Can't stop a component which is not running")
             raise ValueError("Can't stop a component which is not running")
         self.__running = False
         self.__running = False
+        logger.info(BIND10_COMPONENT_STOP, self.name())
         self.stop_internal()
         self.stop_internal()
 
 
     def stop_internal(self):
     def stop_internal(self):
@@ -147,7 +153,6 @@ class Component:
         """
         """
         if not self.running():
         if not self.running():
             raise ValueError("Can't fail component that isn't running")
             raise ValueError("Can't fail component that isn't running")
-        self.failed_internal()
         self.__running = False
         self.__running = False
         self.failed_internal()
         self.failed_internal()
         # If it is a core component or the needed component failed to start
         # If it is a core component or the needed component failed to start
@@ -155,9 +160,11 @@ class Component:
         if self.__kind == 'core' or \
         if self.__kind == 'core' or \
             (self.__kind == 'needed' and time.time() - 10 < self.__start_time):
             (self.__kind == 'needed' and time.time() - 10 < self.__start_time):
             self.__dead = True
             self.__dead = True
+            logger.fatal(BIND10_COMPONENT_UNSATISFIED, self.name())
             self._boss.component_shutdown(1)
             self._boss.component_shutdown(1)
         # This means we want to restart
         # This means we want to restart
         else:
         else:
+            logger.warn(BIND10_COMPONENT_RESTART, self.name())
             self.start()
             self.start()
 
 
     def failed_internal(self):
     def failed_internal(self):
@@ -225,7 +232,6 @@ class SockCreator(Component):
         return self.__creator.pid()
         return self.__creator.pid()
 
 
 class Msgq(Component):
 class Msgq(Component):
-<<<<<<< HEAD
     """
     """
     The message queue. Starting is passed to boss, stopping is not supported
     The message queue. Starting is passed to boss, stopping is not supported
     and we leave the boss kill it by signal.
     and we leave the boss kill it by signal.
@@ -316,6 +322,7 @@ class Configurator:
         if self._running:
         if self._running:
             raise ValueError("Trying to start the component configurator " +
             raise ValueError("Trying to start the component configurator " +
                              "twice")
                              "twice")
+        logger.info(BIND10_CONFIGURATOR_START)
         self.__reconfigure_internal({}, configuration)
         self.__reconfigure_internal({}, configuration)
         self._running = True
         self._running = True
 
 
@@ -326,6 +333,7 @@ class Configurator:
         if not self._running:
         if not self._running:
             raise ValueError("Trying to shutdown the component " +
             raise ValueError("Trying to shutdown the component " +
                              "configurator while it's not yet running")
                              "configurator while it's not yet running")
+        logger.info(BIND10_CONFIGURATOR_STOP)
         self.__reconfigure_internal(self._old_config, {})
         self.__reconfigure_internal(self._old_config, {})
         self._running = False
         self._running = False
 
 
@@ -337,6 +345,7 @@ class Configurator:
         if not self._running:
         if not self._running:
             raise ValueError("Trying to reconfigure the component " +
             raise ValueError("Trying to reconfigure the component " +
                              "configurator while it's not yet running")
                              "configurator while it's not yet running")
+        logger.info(BIND10_CONFIGURATOR_RECONFIGURE)
         self.__reconfigure_internal(self._old_config, configuration)
         self.__reconfigure_internal(self._old_config, configuration)
 
 
     def _build_plan(self, old, new):
     def _build_plan(self, old, new):
@@ -350,6 +359,7 @@ class Configurator:
         Any configuration problems are expected to be handled here, so the
         Any configuration problems are expected to be handled here, so the
         plan is not yet run.
         plan is not yet run.
         """
         """
+        logger.debug(DBG_TRACE_DATA, BIND10_CONFIGURATOR_BUILD, old, new)
         plan = []
         plan = []
         # Handle removals of old components
         # Handle removals of old components
         for cname in old.keys():
         for cname in old.keys():
@@ -409,9 +419,12 @@ class Configurator:
         * start
         * start
         * stop
         * stop
         """
         """
+        logger.debug(DBG_TRACE_DATA, BIND10_CONFIGURATOR_RUN, len(plan))
         for task in plan:
         for task in plan:
             component = task['component']
             component = task['component']
             command = task['command']
             command = task['command']
+            logger.debug(DBG_TRACE_DETAILED, BIND10_CONFIGURATOR_TASK, command,
+                         component.name())
             if command == 'start':
             if command == 'start':
                 component.start()
                 component.start()
                 self._components[task['name']] = component
                 self._components[task['name']] = component