Browse Source

[trac758] logging conversion for bob

Jelte Jansen 14 years ago
parent
commit
cae4ced003
3 changed files with 227 additions and 73 deletions
  1. 6 2
      src/bin/bind10/Makefile.am
  2. 60 71
      src/bin/bind10/bind10.py.in
  3. 161 0
      src/bin/bind10/bind10_messages.mes

+ 6 - 2
src/bin/bind10/Makefile.am

@@ -1,16 +1,17 @@
 SUBDIRS = . tests
 
 sbin_SCRIPTS = bind10
-CLEANFILES = bind10 bind10.pyc
+CLEANFILES = bind10 bind10.pyc bind10_messages.py bind10_messages.pyc
 
 pkglibexecdir = $(libexecdir)/@PACKAGE@
+pyexec_DATA = bind10_messages.py
 
 bind10dir = $(pkgdatadir)
 bind10_DATA = bob.spec
 EXTRA_DIST = bob.spec
 
 man_MANS = bind10.8
-EXTRA_DIST += $(man_MANS) bind10.xml
+EXTRA_DIST += $(man_MANS) bind10.xml bind10_messages.mes
 
 if ENABLE_MAN
 
@@ -19,6 +20,9 @@ bind10.8: bind10.xml
 
 endif
 
+bind10_messages.py: bind10_messages.mes
+	$(top_builddir)/src/lib/log/compiler/message -p $(top_srcdir)/src/bin/bind10/bind10_messages.mes
+
 # this is done here since configure.ac AC_OUTPUT doesn't expand exec_prefix
 bind10: bind10.py
 	$(SED) -e "s|@@PYTHONPATH@@|@pyexecdir@|" \

+ 60 - 71
src/bin/bind10/bind10.py.in

@@ -65,6 +65,16 @@ import posix
 import isc.cc
 import isc.util.process
 import isc.net.parse
+import isc.log
+from bind10_messages import *
+
+isc.log.init("b10-bind10")
+logger = isc.log.Logger("bind10")
+
+# Pending system-wide debug level definitions, the ones we
+# use here are hardcoded for now
+DBG_PROCESS = 10
+DBG_COMMANDS = 30
 
 # Assign this process some longer name
 isc.util.process.rename(sys.argv[0])
@@ -252,8 +262,7 @@ class BoB:
             if new_config['start_' + name]:
                 if not started:
                     if self.uid is not None:
-                        sys.stderr.write("[bind10] Starting " + name + " as " +
-                            "a user, not root. This might fail.\n")
+                        logger.warn(BIND10_START_AS_NON_ROOT, name)
                     start()
             else:
                 stop()
@@ -279,9 +288,8 @@ class BoB:
             self.started_auth_family = False
 
         # The real code of the config handler function follows here
-        if self.verbose:
-            sys.stdout.write("[bind10] Handling new configuration: " +
-                str(new_config) + "\n")
+        logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
+                     new_config)
         start_stop('resolver', self.started_resolver_family, resolver_on,
             resolver_off)
         start_stop('auth', self.started_auth_family, auth_on, auth_off)
@@ -298,8 +306,7 @@ class BoB:
         return process_list
 
     def command_handler(self, command, args):
-        if self.verbose:
-            sys.stdout.write("[bind10] Boss got command: " + str(command) + "\n")
+        logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
         answer = isc.config.ccsession.create_answer(1, "command not implemented")
         if type(command) != str:
             answer = isc.config.ccsession.create_answer(1, "bad command")
@@ -332,12 +339,10 @@ class BoB:
             start, this runs through the list of started processes, killing
             each one.  It then clears that list.
         """
-        if self.verbose:
-            sys.stdout.write("[bind10] killing started processes:\n")
+        logger.info(BIND10_KILLING_ALL_PROCESSES)
 
         for pid in self.processes:
-            if self.verbose:
-                sys.stdout.write("[bind10] - %s\n" % self.processes[pid].name)
+            logger.info(BIND10_KILL_PROCESS, self.processes[pid].name)
             self.processes[pid].process.kill()
         self.processes = {}
 
@@ -351,18 +356,14 @@ class BoB:
             xfrin/xfrout and zone manager as we don't need to start those if we
             are not running the authoritative server.)
         """
-        if self.verbose:
-            sys.stdout.write("[bind10] Reading Boss configuration:\n")
+        logger.info(BIND10_READING_BOSS_CONFIGURATION)
 
         config_data = self.ccs.get_full_config()
         self.cfg_start_auth = config_data.get("start_auth")
         self.cfg_start_resolver = config_data.get("start_resolver")
 
-        if self.verbose:
-            sys.stdout.write("[bind10] - start_auth: %s\n" %
-                str(self.cfg_start_auth))
-            sys.stdout.write("[bind10] - start_resolver: %s\n" %
-                str(self.cfg_start_resolver))
+        logger.info(BIND10_CONFIGURATION_START_AUTH, self.cfg_start_auth)
+        logger.info(BIND10_CONFIGURATION_START_RESOLVER, self.cfg_start_resolver)
 
     def log_starting(self, process, port = None, address = None):
         """
@@ -377,13 +378,14 @@ class BoB:
             appended to the message (if present).
         """
         self.curproc = process
-        if self.verbose:
-            sys.stdout.write("[bind10] Starting %s" % self.curproc)
-            if port is not None:
-                sys.stdout.write(" on port %d" % port)
-                if address is not None:
-                    sys.stdout.write(" (address %s)" % str(address))
-            sys.stdout.write("\n")
+        if port is None and address is None:
+            logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS, self.curproc)
+        elif address is None:
+            logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS_PORT, self.curproc,
+                        port)
+        else:
+            logger.debug(DBG_PROCESS, BIND10_STARTING_PROCESS_PORT_ADDRESS,
+                        self.curproc, address, port)
 
     def log_started(self, pid = None):
         """
@@ -391,11 +393,10 @@ class BoB:
             message.  As with starting_message(), this ensures a consistent
             format.
         """
-        if self.verbose:
-            sys.stdout.write("[bind10] Started %s" % self.curproc)
-            if pid is not None:
-                sys.stdout.write(" (PID %d)" % pid)
-            sys.stdout.write("\n")
+        if pid is None:
+            logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
+        else:
+            logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
 
     # The next few methods start the individual processes of BIND-10.  They
     # are called via start_all_processes().  If any fail, an exception is
@@ -459,7 +460,9 @@ class BoB:
         """
         self.log_starting("ccsession")
         self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION, 
-                                      self.config_handler, self.command_handler)
+                                      self.config_handler,
+                                      self.command_handler,
+                                      None, True)
         self.ccs.start()
         self.log_started()
 
@@ -620,12 +623,12 @@ class BoB:
         # running
         c_channel_env = {}
         if self.msgq_socket_file is not None:
-             c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file 
-        if self.verbose:
-           sys.stdout.write("[bind10] Checking for already running b10-msgq\n")
+             c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
+        logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
         # try to connect, and if we can't wait a short while
         try:
             self.cc_session = isc.cc.Session(self.msgq_socket_file)
+            logger.fatal(BIND10_MSGQ_ALREADY_RUNNING);
             return "b10-msgq already running, or socket file not cleaned , cannot start"
         except isc.cc.session.SessionError:
             # this is the case we want, where the msgq is not running
@@ -663,8 +666,7 @@ class BoB:
         Stop the given process, friendly-like. The process is the name it has
         (in logs, etc), the recipient is the address on msgq.
         """
-        if self.verbose:
-            sys.stdout.write("[bind10] Asking %s to terminate\n" % process)
+        logger.info(BIND10_STOP_PROCESS, process)
         # TODO: Some timeout to solve processes that don't want to die would
         # help. We can even store it in the dict, it is used only as a set
         self.expected_shutdowns[process] = 1
@@ -690,8 +692,7 @@ class BoB:
 
     def shutdown(self):
         """Stop the BoB instance."""
-        if self.verbose:
-            sys.stdout.write("[bind10] Stopping the server.\n")
+        logger.info(BIND10_SHUTDOWN)
         # first try using the BIND 10 request to stop
         try:
             self.stop_all_processes()
@@ -705,9 +706,8 @@ class BoB:
         # next try sending a SIGTERM
         processes_to_stop = list(self.processes.values())
         for proc_info in processes_to_stop:
-            if self.verbose:
-                sys.stdout.write("[bind10] Sending SIGTERM to %s (PID %d).\n" % 
-                                 (proc_info.name, proc_info.pid))
+            logger.info(BIND10_SEND_SIGTERM, proc_info.name,
+                        proc_info.pid)
             try:
                 proc_info.process.terminate()
             except OSError:
@@ -721,17 +721,15 @@ class BoB:
             self.reap_children()
             processes_to_stop = list(self.processes.values())
             for proc_info in processes_to_stop:
-                if self.verbose:
-                    sys.stdout.write("[bind10] Sending SIGKILL to %s (PID %d).\n" % 
-                                     (proc_info.name, proc_info.pid))
+                logger.info(BIND10_SEND_SIGKILL, proc_info.name,
+                            proc_info.pid)
                 try:
                     proc_info.process.kill()
                 except OSError:
                     # ignore these (usually ESRCH because the child
                     # finally exited)
                     pass
-        if self.verbose:
-            sys.stdout.write("[bind10] All processes ended, server done.\n")
+        logger.info(BIND10_SHUTDOWN_COMPLETE)
 
     def _get_process_exit_status(self):
         return os.waitpid(-1, os.WNOHANG)
@@ -759,18 +757,16 @@ class BoB:
                 # elsewhere.
                 if self.runnable:
                     if exit_status is None:
-                        sys.stdout.write(
-                            "[bind10] Process %s (PID %d) died: exit status not available" % 
-                            (proc_info.name, proc_info.pid))
+                        logger.info(BIND10_PROCESS_ENDED_NO_EXIT_STATUS,
+                                    proc_info.name, proc_info.pid)
                     else:
-                        sys.stdout.write(
-                            "[bind10] Process %s (PID %d) terminated, exit status = %d\n" % 
-                            (proc_info.name, proc_info.pid, exit_status))
+                        logger.info(BIND10_PROCESS_ENDED_WITH_EXIT_STATUS,
+                                    proc_info.name, proc_info.pid,
+                                    exit_status)
 
                     # Was it a special process?
                     if proc_info.name == "b10-msgq":
-                        sys.stdout.write(
-                                 "[bind10] The b10-msgq process died, shutting down.\n")
+                        logger.fatal(BIND10_MSGQ_DAEMON_ENDED)
                         self.runnable = False
 
                 # If we're in 'brittle' mode, we want to shutdown after
@@ -778,7 +774,7 @@ class BoB:
                 if self.brittle:
                     self.runnable = False
             else:
-                sys.stdout.write("[bind10] Unknown child pid %d exited.\n" % pid)
+                logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
 
     def restart_processes(self):
         """
@@ -809,14 +805,11 @@ class BoB:
                     next_restart = restart_time
                 still_dead[proc_info.pid] = proc_info
             else:
-                if self.verbose:
-                    sys.stdout.write("[bind10] Resurrecting dead %s process...\n" % 
-                        proc_info.name)
+                logger.info(BIND10_RESURRECTING_PROCESS, proc_info.name)
                 try:
                     proc_info.respawn()
                     self.processes[proc_info.pid] = proc_info
-                    sys.stdout.write("[bind10] Resurrected %s (PID %d)\n" %
-                                     (proc_info.name, proc_info.pid))
+                    logger.info(BIND10_RESURRECTED_PROCESS, proc_info.name, proc_info.pid)
                 except:
                     still_dead[proc_info.pid] = proc_info
         # remember any processes that refuse to be resurrected
@@ -848,8 +841,7 @@ def fatal_signal(signal_number, stack_frame):
     """We need to exit (SIGINT or SIGTERM received)."""
     global options
     global boss_of_bind
-    if options.verbose:
-        sys.stdout.write("[bind10] Received %s.\n" % get_signame(signal_number))
+    logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
     signal.signal(signal.SIGCHLD, signal.SIG_DFL)
     boss_of_bind.runnable = False
 
@@ -967,12 +959,11 @@ def main():
             pass
 
         if setuid is None:
-            sys.stderr.write("bind10: invalid user: '%s'\n" % options.user)
+            logger.fatal(BIND10_INVALID_USER, options.user)
             sys.exit(1)
 
     # Announce startup.
-    if options.verbose:
-        sys.stdout.write("%s\n" % VERSION)
+    logger.info(BIND10_STARTING, VERSION)
 
     # Create wakeup pipe for signal handlers
     wakeup_pipe = os.pipe()
@@ -994,9 +985,9 @@ def main():
                        setuid, username, options.cmdctl_port, options.brittle)
     startup_result = boss_of_bind.startup()
     if startup_result:
-        sys.stderr.write("[bind10] Error on startup: %s\n" % startup_result)
+        logger.fatal(BIND10_STARTUP_ERROR, startup_result)
         sys.exit(1)
-    sys.stdout.write("[bind10] BIND 10 started\n")
+    logger.info(BIND10_STARTUP_COMPLETE)
     dump_pid(options.pid_file)
 
     # In our main loop, we check for dead processes or messages 
@@ -1022,7 +1013,7 @@ def main():
             if err.args[0] == errno.EINTR:
                 (rlist, wlist, xlist) = ([], [], [])
             else:
-                sys.stderr.write("[bind10] Error with select(); %s\n" % err)
+                logger.fatal(BIND10_SELECT_ERROR, err)
                 break
 
         for fd in rlist + xlist:
@@ -1030,8 +1021,7 @@ def main():
                 try:
                     boss_of_bind.ccs.check_command()
                 except isc.cc.session.ProtocolError:
-                    if options.verbose:
-                        sys.stderr.write("[bind10] msgq channel disappeared.\n")
+                    logger.fatal(BIND10_MSGQ_DISAPPEARED)
                     break
             elif fd == wakeup_fd:
                 os.read(wakeup_fd, 32)
@@ -1039,7 +1029,6 @@ def main():
     # shutdown
     signal.signal(signal.SIGCHLD, signal.SIG_DFL)
     boss_of_bind.shutdown()
-    sys.stdout.write("[bind10] BIND 10 exiting\n");
     unlink_pid_file(options.pid_file)
     sys.exit(0)
 

+ 161 - 0
src/bin/bind10/bind10_messages.mes

@@ -0,0 +1,161 @@
+# Copyright (C) 2011  Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# No namespace declaration - these constants go in the global namespace
+# of the xfrin messages python module.
+
+% BIND10_CHECK_MSGQ_ALREADY_RUNNING checking if msgq is already running
+The boss process is starting up and will now check if the message bus
+daemon is already running. If so, it will not be able to start, as it
+needs a dedicated message bus.
+
+% BIND10_CONFIGURATION_START_AUTH start authoritative server: %1
+This message shows whether or not the authoritative server should be
+started according to the configuration.
+
+% BIND10_CONFIGURATION_START_RESOLVER start resolver: %1
+This message shows whether or not the resolver should be
+started according to the configuration.
+
+% BIND10_INVALID_USER invalid user: '%1'
+The boss process was started with the -u option, to drop root priveleges
+and continue running as the specified user, but the user is unknown.
+
+% BIND10_KILL_PROCESS killing process %1
+The boss module is sending a kill signal to the given process, as part
+of the process of killing all started processes during a failed startup,
+as described for BIND10_KILLING_ALL_PROCESSES
+
+% BIND10_KILLING_ALL_PROCESSES killing all started processes
+The boss module was not able to start every process it needed to start
+during startup, and will now kill the processes that did get started.
+
+% BIND10_MSGQ_ALREADY_RUNNING msgq daemon already running, cannot start
+There already appears to me a message bus daemon running. Either an
+old process was not shut down correctly, and needs to be killed, or
+another instance of BIND10, with the same msgq domain socket, is
+running, which needs to be stopped.
+
+% BIND10_MSGQ_DAEMON_ENDED b10-msgq process died, shutting down
+The message bus daemon has died. This is a fatal error, since it may
+leave the system in an inconsistent state. BIND10 will now shut down
+
+% BIND10_MSGQ_DISAPPEARED msgq channel disappeared
+While listening on the message bus channel for messages, it suddenly
+disappeared. The msgq daemon may have died. This might lead to an
+inconsistend state of the system, and BIND 10 will now shut down.
+
+% BIND10_PROCESS_ENDED_NO_EXIT_STATUS Process %1 (PID %2) died: exit status not available
+The given process ended unexpectedly, but no exit status is
+available. Depending on which module it was, it may simply be
+restarted, or it may be a problem that will cause the boss module to
+shut down too. The latter happens if it was the message bus daemon,
+which, if it has died suddenly, may leave the system in an
+inconsistent state. BIND10 will also shut down now if it has been run
+with --brittle.
+
+% BIND10_PROCESS_ENDED_WITH_EXIT_STATUS Process %1 (PID %2) terminated, exit status = %3
+The given process ended unexpectedly with the given exit status.
+Depending on which module it was, it may simply be restarted, or it
+may be a problem that will cause the boss module to shut down too.
+The latter happens if it was the message bus daemon, which, if it has
+died suddenly, may leave the system in an inconsistent state. BIND10
+will also shut down now if it has been run with --brittle.
+
+% BIND10_READING_BOSS_CONFIGURATION reading boss configuration
+The boss process is starting up, and will now process the initial
+configuration, as received from the configuration manager.
+
+% BIND10_RECEIVED_COMMAND received command: %1
+The boss module received a command and shall now process it. The command
+is printed.
+
+% BIND10_RECEIVED_NEW_CONFIGURATION received new configuration: %1
+The boss module received a configuration update and is going to apply
+it now. The new configuration is printed.
+
+% BIND10_RECEIVED_SIGNAL received signal %1
+The boss module received the given signal.
+
+% BIND10_RESURRECTED_PROCESS resurrected %1 (PID %2)
+The given process has been restarted succesfully, and is now running
+with the given process id.
+
+% BIND10_RESURRECTING_PROCESS resurrecting dead %1 process...
+The given process has ended unexpectedly, and is now restarted.
+
+% BIND10_SELECT_ERROR error in select() call: %1
+There was a fatal error in the call to select(), used to see if a child
+process has ended or if there is a message on the message bus. This
+should not happen under normal circumstances and is considered fatal,
+so BIND 10 will now shut down. The specific error is printed.
+
+% BIND10_SEND_SIGKILL sending SIGKILL to %1 (PID %2)
+The boss module is sending a SIGKILL signal to the given process.
+
+% BIND10_SEND_SIGTERM sending SIGTERM to %1 (PID %2)
+The boss module is sending a SIGTERM signal to the given process.
+
+% BIND10_SHUTDOWN stopping the server
+The boss process received a command or signal telling it to shut down.
+It will send shutdown commands to each process. The processes that do
+not shut down will then receive a SIGTERM signal. If that doesn't work,
+it shall send SIGKILL signals to the processes still alive.
+
+% BIND10_SHUTDOWN_COMPLETE all processes ended, shutdown complete
+All child processes have been stopped, and the boss process will now
+stop itself.
+
+% BIND10_START_AS_NON_ROOT starting %1 as a user, not root. This might fail.
+The given module is being started or restarted without root privileges.
+If the module needs these privileges, it may have problems starting.
+Note that this issue should be resolved by the pending 'socket-creator'
+process; once that has been implemented, modules should not need root
+privileges anymore.
+
+% BIND10_STARTED_PROCESS started %1
+The given process has successfully been started
+
+% BIND10_STARTED_PROCESS_PID started %1 (PID %2)
+The given process has successfully been started, and has the given PID.
+
+% BIND10_STARTING starting BIND10: %1
+Informational message on startup that show the full version.
+
+% BIND10_STARTING_PROCESS starting process %1
+The boss module is starting the given process.
+
+% BIND10_STARTING_PROCESS_PORT starting process %1 (to listen on port %2)
+The boss module is starting the given process, which will listen on the
+given port number.
+
+% BIND10_STARTING_PROCESS_PORT_ADDRESS starting process %1 (to listen on %2:%3)
+The boss module is starting the given process, which will listen on the
+given address and port number.
+
+% BIND10_STARTUP_COMPLETE BIND 10 started
+All modules have been successfully started, and BIND 10 is now running.
+
+% BIND10_STARTUP_ERROR error during startup: %1
+There was a fatal error when BIND10 was trying to start. The error is
+shown, and BIND10 will now shut down.
+
+% BIND10_STOP_PROCESS asking %1 to shut down
+The boss module is sending a shutdown command to the given module over
+the message channel.
+
+% BIND10_UNKNOWN_CHILD_PROCESS_ENDED unknown child pid %1 exited
+An unknown child process has exited. The PID is printed, but no further
+action will be taken by the boss process.
+