1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192 |
- #!@PYTHON@
- # Copyright (C) 2010,2011 Internet Systems Consortium.
- #
- # Permission to use, copy, modify, and distribute this software for any
- # purpose with or without fee is hereby granted, provided that the above
- # copyright notice and this permission notice appear in all copies.
- #
- # THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
- # DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- # INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
- # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
- # FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
- # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- """
- This file implements the Boss of Bind (BoB, or bob) program.
- Its purpose is to start up the BIND 10 system, and then manage the
- processes, by starting and stopping processes, plus restarting
- processes that exit.
- To start the system, it first runs the c-channel program (msgq), then
- connects to that. It then runs the configuration manager, and reads
- its own configuration. Then it proceeds to starting other modules.
- The Python subprocess module is used for starting processes, but
- because this is not efficient for managing groups of processes,
- SIGCHLD signals are caught and processed using the signal module.
- Most of the logic is contained in the BoB class. However, since Python
- requires that signal processing happen in the main thread, we do
- signal handling outside of that class, in the code running for
- __main__.
- """
- import sys; sys.path.append ('@@PYTHONPATH@@')
- import os
- # If B10_FROM_SOURCE is set in the environment, we use data files
- # from a directory relative to that, otherwise we use the ones
- # installed on the system
- if "B10_FROM_SOURCE" in os.environ:
- SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
- ADD_LIBEXEC_PATH = False
- else:
- PREFIX = "@prefix@"
- DATAROOTDIR = "@datarootdir@"
- SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
- ADD_LIBEXEC_PATH = True
-
- import subprocess
- import signal
- import re
- import errno
- import time
- import select
- import random
- import socket
- from optparse import OptionParser, OptionValueError
- import io
- import pwd
- import posix
- import copy
- import isc.cc
- import isc.util.process
- import isc.net.parse
- import isc.log
- from isc.log_messages.bind10_messages import *
- import isc.bind10.component
- import isc.bind10.special_component
- isc.log.init("b10-boss")
- logger = isc.log.Logger("boss")
- # Pending system-wide debug level definitions, the ones we
- # use here are hardcoded for now
- DBG_PROCESS = logger.DBGLVL_TRACE_BASIC
- DBG_COMMANDS = logger.DBGLVL_TRACE_DETAIL
- # Assign this process some longer name
- isc.util.process.rename(sys.argv[0])
- # This is the version that gets displayed to the user.
- # The VERSION string consists of the module name, the module version
- # number, and the overall BIND 10 version number (set in configure.ac).
- VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
- # This is for boot_time of Boss
- _BASETIME = time.gmtime()
- class RestartSchedule:
- """
- Keeps state when restarting something (in this case, a process).
- When a process dies unexpectedly, we need to restart it. However, if
- it fails to restart for some reason, then we should not simply keep
- restarting it at high speed.
- A more sophisticated algorithm can be developed, but for now we choose
- a simple set of rules:
- * If a process was been running for >=10 seconds, we restart it
- right away.
- * If a process was running for <10 seconds, we wait until 10 seconds
- after it was started.
- To avoid programs getting into lockstep, we use a normal distribution
- to avoid being restarted at exactly 10 seconds."""
- def __init__(self, restart_frequency=10.0):
- self.restart_frequency = restart_frequency
- self.run_start_time = None
- self.run_stop_time = None
- self.restart_time = None
-
- def set_run_start_time(self, when=None):
- if when is None:
- when = time.time()
- self.run_start_time = when
- sigma = self.restart_frequency * 0.05
- self.restart_time = when + random.normalvariate(self.restart_frequency,
- sigma)
- def set_run_stop_time(self, when=None):
- """We don't actually do anything with stop time now, but it
- might be useful for future algorithms."""
- if when is None:
- when = time.time()
- self.run_stop_time = when
- def get_restart_time(self, when=None):
- if when is None:
- when = time.time()
- return max(when, self.restart_time)
- class ProcessInfoError(Exception): pass
- class ProcessInfo:
- """Information about a process"""
- dev_null = open(os.devnull, "w")
- def __init__(self, name, args, env={}, dev_null_stdout=False,
- dev_null_stderr=False, uid=None, username=None):
- self.name = name
- self.args = args
- self.env = env
- self.dev_null_stdout = dev_null_stdout
- self.dev_null_stderr = dev_null_stderr
- self.restart_schedule = RestartSchedule()
- self.uid = uid
- self.username = username
- self.process = None
- self.pid = None
- def _preexec_work(self):
- """Function used before running a program that needs to run as a
- different user."""
- # First, put us into a separate process group so we don't get
- # SIGINT signals on Ctrl-C (the boss will shut everthing down by
- # other means).
- os.setpgrp()
- # Second, set the user ID if one has been specified
- if self.uid is not None:
- try:
- posix.setuid(self.uid)
- except OSError as e:
- if e.errno == errno.EPERM:
- # if we failed to change user due to permission report that
- raise ProcessInfoError("Unable to change to user %s (uid %d)" % (self.username, self.uid))
- else:
- # otherwise simply re-raise whatever error we found
- raise
- def _spawn(self):
- if self.dev_null_stdout:
- spawn_stdout = self.dev_null
- else:
- spawn_stdout = None
- if self.dev_null_stderr:
- spawn_stderr = self.dev_null
- else:
- spawn_stderr = None
- # Environment variables for the child process will be a copy of those
- # of the boss process with any additional specific variables given
- # on construction (self.env).
- spawn_env = copy.deepcopy(os.environ)
- spawn_env.update(self.env)
- if ADD_LIBEXEC_PATH:
- spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
- self.process = subprocess.Popen(self.args,
- stdin=subprocess.PIPE,
- stdout=spawn_stdout,
- stderr=spawn_stderr,
- close_fds=True,
- env=spawn_env,
- preexec_fn=self._preexec_work)
- self.pid = self.process.pid
- self.restart_schedule.set_run_start_time()
- # spawn() and respawn() are the same for now, but in the future they
- # may have different functionality
- def spawn(self):
- self._spawn()
- def respawn(self):
- self._spawn()
- class CChannelConnectError(Exception): pass
- class ProcessStartError(Exception): pass
- class BoB:
- """Boss of BIND class."""
-
- def __init__(self, msgq_socket_file=None, data_path=None,
- config_filename=None, nocache=False, verbose=False, setuid=None,
- username=None, cmdctl_port=None, brittle=False, wait_time=10):
- """
- Initialize the Boss of BIND. This is a singleton (only one can run).
-
- The msgq_socket_file specifies the UNIX domain socket file that the
- msgq process listens on. If verbose is True, then the boss reports
- what it is doing.
- Data path and config filename are passed through to config manager
- (if provided) and specify the config file to be used.
- The cmdctl_port is passed to cmdctl and specify on which port it
- should listen.
- brittle is a debug option that controls whether the Boss shuts down
- after any process dies.
- wait_time controls the amount of time (in seconds) that Boss waits
- for selected processes to initialize before continuing with the
- initialization. Currently this is only the configuration manager.
- """
- self.cc_session = None
- self.ccs = None
- self.cfg_start_auth = True
- self.cfg_start_resolver = False
- self.cfg_start_dhcp6 = False
- self.cfg_start_dhcp4 = False
- self.started_auth_family = False
- self.started_resolver_family = False
- self.curproc = None
- # XXX: Not used now, waits for reintroduction of restarts.
- self.dead_processes = {}
- self.msgq_socket_file = msgq_socket_file
- self.nocache = nocache
- self.component_config = {}
- self.processes = {}
- self.runnable = False
- self.uid = setuid
- self.username = username
- self.verbose = verbose
- self.data_path = data_path
- self.config_filename = config_filename
- self.cmdctl_port = cmdctl_port
- self.brittle = brittle
- self.wait_time = wait_time
- self.sockcreator = None
- self._component_configurator = isc.bind10.component.Configurator(self,
- isc.bind10.special_component.get_specials())
- # The priorities here make them start in the correct order. First
- # the socket creator (which would drop root privileges by then),
- # then message queue and after that the config manager (which uses
- # the config manager)
- self.__core_components = {
- 'sockcreator': {
- 'kind': 'core',
- 'special': 'sockcreator',
- 'priority': 200
- },
- 'msgq': {
- 'kind': 'core',
- 'special': 'msgq',
- 'priority': 199
- },
- 'cfgmgr': {
- 'kind': 'core',
- 'special': 'cfgmgr',
- 'priority': 198
- }
- }
- self.__started = False
- self.exitcode = 0
- # If -v was set, enable full debug logging.
- if self.verbose:
- logger.set_severity("DEBUG", 99)
- def __propagate_component_config(self, config):
- comps = dict(config)
- # Fill in the core components, so they stay alive
- for comp in self.__core_components:
- if comp in comps:
- raise Exception(comp + " is core component managed by " +
- "bind10 boss, do not set it")
- comps[comp] = self.__core_components[comp]
- # Update the configuration
- self._component_configurator.reconfigure(comps)
- def config_handler(self, new_config):
- # If this is initial update, don't do anything now, leave it to startup
- if not self.runnable:
- return
- # Now we declare few functions used only internally here. Besides the
- # benefit of not polluting the name space, they are closures, so we
- # don't need to pass some variables
- def start_stop(name, started, start, stop):
- if not'start_' + name in new_config:
- return
- if new_config['start_' + name]:
- if not started:
- if self.uid is not None:
- logger.info(BIND10_START_AS_NON_ROOT, name)
- start()
- else:
- stop()
- # These four functions are passed to start_stop (smells like functional
- # programming little bit)
- def resolver_on():
- self.component_config['b10-resolver'] = { 'kind': 'needed',
- 'special': 'resolver' }
- self.__propagate_component_config(self.component_config)
- self.started_resolver_family = True
- def resolver_off():
- if 'b10-resolver' in self.component_config:
- del self.component_config['b10-resolver']
- self.__propagate_component_config(self.component_config)
- self.started_resolver_family = False
- def auth_on():
- self.component_config['b10-auth'] = { 'kind': 'needed',
- 'special': 'auth' }
- self.component_config['b10-xfrout'] = { 'kind': 'dispensable',
- 'address': 'Xfrout' }
- self.component_config['b10-xfrin'] = { 'kind': 'dispensable',
- 'special': 'xfrin' }
- self.component_config['b10-zonemgr'] = { 'kind': 'dispensable',
- 'address': 'Zonemgr' }
- self.__propagate_component_config(self.component_config)
- self.started_auth_family = True
- def auth_off():
- if 'b10-zonemgr' in self.component_config:
- del self.component_config['b10-zonemgr']
- if 'b10-xfrin' in self.component_config:
- del self.component_config['b10-xfrin']
- if 'b10-xfrout' in self.component_config:
- del self.component_config['b10-xfrout']
- if 'b10-auth' in self.component_config:
- del self.component_config['b10-auth']
- self.__propagate_component_config(self.component_config)
- self.started_auth_family = False
- # The real code of the config handler function follows here
- logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
- new_config)
- start_stop('resolver', self.started_resolver_family, resolver_on,
- resolver_off)
- start_stop('auth', self.started_auth_family, auth_on, auth_off)
- answer = isc.config.ccsession.create_answer(0)
- return answer
- def get_processes(self):
- pids = list(self.processes.keys())
- pids.sort()
- process_list = [ ]
- for pid in pids:
- process_list.append([pid, self.processes[pid].name()])
- return process_list
- def _get_stats_data(self):
- return { "owner": "Boss",
- "data": { 'boot_time':
- time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
- }
- }
- def command_handler(self, command, args):
- logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
- answer = isc.config.ccsession.create_answer(1, "command not implemented")
- if type(command) != str:
- answer = isc.config.ccsession.create_answer(1, "bad command")
- else:
- if command == "shutdown":
- self.runnable = False
- answer = isc.config.ccsession.create_answer(0)
- elif command == "getstats":
- answer = isc.config.ccsession.create_answer(0, self._get_stats_data())
- elif command == "sendstats":
- # send statistics data to the stats daemon immediately
- stats_data = self._get_stats_data()
- valid = self.ccs.get_module_spec().validate_statistics(
- True, stats_data["data"])
- if valid:
- cmd = isc.config.ccsession.create_command('set', stats_data)
- seq = self.cc_session.group_sendmsg(cmd, 'Stats')
- # Consume the answer, in case it becomes a orphan message.
- try:
- self.cc_session.group_recvmsg(False, seq)
- except isc.cc.session.SessionTimeout:
- pass
- answer = isc.config.ccsession.create_answer(0)
- else:
- logger.fatal(BIND10_INVALID_STATISTICS_DATA);
- answer = isc.config.ccsession.create_answer(
- 1, "specified statistics data is invalid")
- elif command == "ping":
- answer = isc.config.ccsession.create_answer(0, "pong")
- elif command == "show_processes":
- answer = isc.config.ccsession. \
- create_answer(0, self.get_processes())
- else:
- answer = isc.config.ccsession.create_answer(1,
- "Unknown command")
- return answer
- def kill_started_processes(self):
- """
- Called as part of the exception handling when a process fails to
- start, this runs through the list of started processes, killing
- each one. It then clears that list.
- """
- logger.info(BIND10_KILLING_ALL_PROCESSES)
- self.stop_creator(True)
- for pid in self.processes:
- logger.info(BIND10_KILL_PROCESS, self.processes[pid].name())
- self.processes[pid].kill(True)
- self.processes = {}
- def read_bind10_config(self):
- """
- Reads the parameters associated with the BoB module itself.
- At present these are the components to start although arguably this
- information should be in the configuration for the appropriate
- module itself. (However, this would cause difficulty in the case of
- xfrin/xfrout and zone manager as we don't need to start those if we
- are not running the authoritative server.)
- """
- logger.info(BIND10_READING_BOSS_CONFIGURATION)
- config_data = self.ccs.get_full_config()
- self.cfg_start_auth = config_data.get("start_auth")
- self.cfg_start_resolver = config_data.get("start_resolver")
- logger.info(BIND10_CONFIGURATION_START_AUTH, self.cfg_start_auth)
- logger.info(BIND10_CONFIGURATION_START_RESOLVER, self.cfg_start_resolver)
- def log_starting(self, process, port = None, address = None):
- """
- A convenience function to output a "Starting xxx" message if the
- logging is set to DEBUG with debuglevel DBG_PROCESS or higher.
- Putting this into a separate method ensures
- that the output form is consistent across all processes.
- The process name (passed as the first argument) is put into
- self.curproc, and is used to indicate which process failed to
- start if there is an error (and is used in the "Started" message
- on success). The optional port and address information are
- appended to the message (if present).
- """
- self.curproc = process
- if port is None and address is None:
- logger.info(BIND10_STARTING_PROCESS, self.curproc)
- elif address is None:
- logger.info(BIND10_STARTING_PROCESS_PORT, self.curproc,
- port)
- else:
- logger.info(BIND10_STARTING_PROCESS_PORT_ADDRESS,
- self.curproc, address, port)
- def log_started(self, pid = None):
- """
- A convenience function to output a 'Started xxxx (PID yyyy)'
- message. As with starting_message(), this ensures a consistent
- format.
- """
- if pid is None:
- logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
- else:
- logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
- def process_running(self, msg, who):
- """
- Some processes return a message to the Boss after they have
- started to indicate that they are running. The form of the
- message is a dictionary with contents {"running:", "<process>"}.
- This method checks the passed message and returns True if the
- "who" process is contained in the message (so is presumably
- running). It returns False for all other conditions and will
- log an error if appropriate.
- """
- if msg is not None:
- try:
- if msg["running"] == who:
- return True
- else:
- logger.error(BIND10_STARTUP_UNEXPECTED_MESSAGE, msg)
- except:
- logger.error(BIND10_STARTUP_UNRECOGNISED_MESSAGE, msg)
-
- return False
- # The next few methods start the individual processes of BIND-10. They
- # are called via start_all_processes(). If any fail, an exception is
- # raised which is caught by the caller of start_all_processes(); this kills
- # processes started up to that point before terminating the program.
- def start_msgq(self):
- """
- Start the message queue and connect to the command channel.
- """
- self.log_starting("b10-msgq")
- msgq_proc = ProcessInfo("b10-msgq", ["b10-msgq"], self.c_channel_env,
- True, not self.verbose, uid=self.uid,
- username=self.username)
- msgq_proc.spawn()
- self.log_started(msgq_proc.pid)
- # Now connect to the c-channel
- cc_connect_start = time.time()
- while self.cc_session is None:
- # if we have been trying for "a while" give up
- if (time.time() - cc_connect_start) > 5:
- raise CChannelConnectError("Unable to connect to c-channel after 5 seconds")
- # try to connect, and if we can't wait a short while
- try:
- self.cc_session = isc.cc.Session(self.msgq_socket_file)
- except isc.cc.session.SessionError:
- time.sleep(0.1)
- # Subscribe to the message queue. The only messages we expect to receive
- # on this channel are once relating to process startup.
- self.cc_session.group_subscribe("Boss")
- return msgq_proc
- def start_cfgmgr(self):
- """
- Starts the configuration manager process
- """
- self.log_starting("b10-cfgmgr")
- args = ["b10-cfgmgr"]
- if self.data_path is not None:
- args.append("--data-path=" + self.data_path)
- if self.config_filename is not None:
- args.append("--config-filename=" + self.config_filename)
- bind_cfgd = ProcessInfo("b10-cfgmgr", args,
- self.c_channel_env, uid=self.uid,
- username=self.username)
- bind_cfgd.spawn()
- self.log_started(bind_cfgd.pid)
- # Wait for the configuration manager to start up as subsequent initialization
- # cannot proceed without it. The time to wait can be set on the command line.
- time_remaining = self.wait_time
- msg, env = self.cc_session.group_recvmsg()
- while time_remaining > 0 and not self.process_running(msg, "ConfigManager"):
- logger.debug(DBG_PROCESS, BIND10_WAIT_CFGMGR)
- time.sleep(1)
- time_remaining = time_remaining - 1
- msg, env = self.cc_session.group_recvmsg()
-
- if not self.process_running(msg, "ConfigManager"):
- raise ProcessStartError("Configuration manager process has not started")
- return bind_cfgd
- def start_ccsession(self, c_channel_env):
- """
- Start the CC Session
- The argument c_channel_env is unused but is supplied to keep the
- argument list the same for all start_xxx methods.
- With regards to logging, note that as the CC session is not a
- process, the log_starting/log_started methods are not used.
- """
- logger.info(BIND10_STARTING_CC)
- self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
- self.config_handler,
- self.command_handler,
- socket_file = self.msgq_socket_file)
- self.ccs.start()
- logger.debug(DBG_PROCESS, BIND10_STARTED_CC)
- # A couple of utility methods for starting processes...
- def start_process(self, name, args, c_channel_env, port=None, address=None):
- """
- Given a set of command arguments, start the process and output
- appropriate log messages. If the start is successful, the process
- is added to the list of started processes.
- The port and address arguments are for log messages only.
- """
- self.log_starting(name, port, address)
- newproc = ProcessInfo(name, args, c_channel_env)
- newproc.spawn()
- self.log_started(newproc.pid)
- return newproc
- def register_process(self, pid, component):
- """
- Put another process into boss to watch over it. When the process
- dies, the component.failed() is called with the exit code.
- """
- self.processes[pid] = component
- def start_simple(self, name):
- """
- Most of the BIND-10 processes are started with the command:
- <process-name> [-v]
- ... where -v is appended if verbose is enabled. This method
- generates the arguments from the name and starts the process.
- The port and address arguments are for log messages only.
- """
- # Set up the command arguments.
- args = [name]
- if self.verbose:
- args += ['-v']
- # ... and start the process
- return self.start_process(name, args, self.c_channel_env)
- # The next few methods start up the rest of the BIND-10 processes.
- # Although many of these methods are little more than a call to
- # start_simple, they are retained (a) for testing reasons and (b) as a place
- # where modifications can be made if the process start-up sequence changes
- # for a given process.
- def start_auth(self):
- """
- Start the Authoritative server
- """
- authargs = ['b10-auth']
- if self.nocache:
- authargs += ['-n']
- if self.uid:
- authargs += ['-u', str(self.uid)]
- if self.verbose:
- authargs += ['-v']
- # ... and start
- return self.start_process("b10-auth", authargs, self.c_channel_env)
- def start_resolver(self):
- """
- Start the Resolver. At present, all these arguments and switches
- are pure speculation. As with the auth daemon, they should be
- read from the configuration database.
- """
- self.curproc = "b10-resolver"
- # XXX: this must be read from the configuration manager in the future
- resargs = ['b10-resolver']
- if self.uid:
- resargs += ['-u', str(self.uid)]
- if self.verbose:
- resargs += ['-v']
- # ... and start
- return self.start_process("b10-resolver", resargs, self.c_channel_env)
- def start_cmdctl(self):
- """
- Starts the command control process
- """
- args = ["b10-cmdctl"]
- if self.cmdctl_port is not None:
- args.append("--port=" + str(self.cmdctl_port))
- return self.start_process("b10-cmdctl", args, self.c_channel_env,
- self.cmdctl_port)
- def start_xfrin(self):
- # XXX: a quick-hack workaround. xfrin will implicitly use dynamically
- # loadable data source modules, which will be installed in $(libdir).
- # On some OSes (including MacOS X and *BSDs) the main process (python)
- # cannot find the modules unless they are located in a common shared
- # object path or a path in the (DY)LD_LIBRARY_PATH. We should seek
- # a cleaner solution, but for a short term workaround we specify the
- # path here, unconditionally, and without even bothering which
- # environment variable should be used.
- #
- # We reuse the ADD_LIBEXEC_PATH variable to see whether we need to
- # do this, as the conditions that make this workaround needed are
- # the same as for the libexec path addition
- # TODO: Once #1292 is finished, remove this method and the special
- # component, use it as normal component.
- c_channel_env = dict(self.c_channel_env)
- if ADD_LIBEXEC_PATH:
- cur_path = os.getenv('DYLD_LIBRARY_PATH')
- cur_path = '' if cur_path is None else ':' + cur_path
- c_channel_env['DYLD_LIBRARY_PATH'] = "@@LIBDIR@@" + cur_path
- cur_path = os.getenv('LD_LIBRARY_PATH')
- cur_path = '' if cur_path is None else ':' + cur_path
- c_channel_env['LD_LIBRARY_PATH'] = "@@LIBDIR@@" + cur_path
- # Set up the command arguments.
- args = ['b10-xfrin']
- if self.verbose:
- args += ['-v']
- return self.start_process("b10-xfrin", args, c_channel_env)
- def start_all_processes(self):
- """
- Starts up all the processes. Any exception generated during the
- starting of the processes is handled by the caller.
- """
- # Start the real core (sockcreator, msgq, cfgmgr)
- self._component_configurator.startup(self.__core_components)
- # Connect to the msgq. This is not a process, so it's not handled
- # inside the configurator.
- c_channel_env = self.c_channel_env
- self.start_ccsession(c_channel_env)
- # Extract the parameters associated with Bob. This can only be
- # done after the CC Session is started. Note that the logging
- # configuration may override the "-v" switch set on the command line.
- self.read_bind10_config()
- # Continue starting the processes. The authoritative server (if
- # selected):
- component_config = {}
- if self.cfg_start_auth:
- component_config['b10-auth'] = { 'kind': 'needed',
- 'special': 'auth' }
- self.__propagate_component_config(component_config)
- # ... and resolver (if selected):
- if self.cfg_start_resolver:
- component_config['b10-resolver'] = { 'kind': 'needed',
- 'special': 'resolver' }
- self.started_resolver_family = True
- self.__propagate_component_config(component_config)
- # Everything after the main components can run as non-root.
- # TODO: this is only temporary - once the privileged socket creator is
- # fully working, nothing else will run as root.
- if self.uid is not None:
- posix.setuid(self.uid)
- # xfrin/xfrout and the zone manager are only meaningful if the
- # authoritative server has been started.
- if self.cfg_start_auth:
- component_config['b10-xfrout'] = { 'kind': 'dispensable',
- 'address': 'Xfrout' }
- component_config['b10-xfrin'] = { 'kind': 'dispensable',
- 'special': 'xfrin' }
- component_config['b10-zonemgr'] = { 'kind': 'dispensable',
- 'address': 'Zonemgr' }
- self.__propagate_component_config(component_config)
- self.started_auth_family = True
- # ... and finally start the remaining processes
- component_config['b10-stats'] = { 'kind': 'dispensable',
- 'address': 'Stats' }
- component_config['b10-stats-httpd'] = { 'kind': 'dispensable',
- 'address': 'StatsHttpd' }
- component_config['b10-cmdctl'] = { 'kind': 'needed',
- 'special': 'cmdctl' }
- if self.cfg_start_dhcp6:
- component_config['b10-dhcp6'] = { 'kind': 'dispensable',
- 'address': 'DHCP6' }
- self.__propagate_component_config(component_config)
- self.component_config = component_config
- def startup(self):
- """
- Start the BoB instance.
- Returns None if successful, otherwise an string describing the
- problem.
- """
- # Try to connect to the c-channel daemon, to see if it is already
- # running
- c_channel_env = {}
- if self.msgq_socket_file is not None:
- c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
- logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
- # try to connect, and if we can't wait a short while
- try:
- self.cc_session = isc.cc.Session(self.msgq_socket_file)
- logger.fatal(BIND10_MSGQ_ALREADY_RUNNING)
- return "b10-msgq already running, or socket file not cleaned , cannot start"
- except isc.cc.session.SessionError:
- # this is the case we want, where the msgq is not running
- pass
- # Start all processes. If any one fails to start, kill all started
- # processes and exit with an error indication.
- try:
- self.c_channel_env = c_channel_env
- self.start_all_processes()
- except Exception as e:
- self.kill_started_processes()
- return "Unable to start " + self.curproc + ": " + str(e)
- # Started successfully
- self.runnable = True
- self.__started = True
- return None
- def stop_process(self, process, recipient):
- """
- Stop the given process, friendly-like. The process is the name it has
- (in logs, etc), the recipient is the address on msgq.
- """
- logger.info(BIND10_STOP_PROCESS, process)
- self.cc_session.group_sendmsg({'command': ['shutdown']}, recipient,
- recipient)
- def component_shutdown(self, exitcode=0):
- """
- Stop the Boss instance from a components' request. The exitcode
- indicates the desired exit code.
- If we did not start yet, it raises an exception, which is meant
- to propagate through the component and configurator to the startup
- routine and abort the startup imediatelly. If it is started up already,
- we just mark it so we terminate soon.
- It does set the exit code in both cases.
- """
- self.exitcode = exitcode
- if not self.__started:
- raise Exception("Component failed during startup");
- else:
- self.runnable = False
- # Series of stop_process wrappers
- def stop_resolver(self):
- self.stop_process('b10-resolver', 'Resolver')
- def stop_auth(self):
- self.stop_process('b10-auth', 'Auth')
- def stop_xfrout(self):
- self.stop_process('b10-xfrout', 'Xfrout')
- def stop_xfrin(self):
- self.stop_process('b10-xfrin', 'Xfrin')
- def stop_zonemgr(self):
- self.stop_process('b10-zonemgr', 'Zonemgr')
- def shutdown(self):
- """Stop the BoB instance."""
- logger.info(BIND10_SHUTDOWN)
- # first try using the BIND 10 request to stop
- try:
- self._component_configurator.shutdown()
- except:
- pass
- # XXX: some delay probably useful... how much is uncertain
- # I have changed the delay from 0.5 to 1, but sometime it's
- # still not enough.
- time.sleep(1)
- self.reap_children()
- # next try sending a SIGTERM
- components_to_stop = list(self.processes.values())
- for component in components_to_stop:
- logger.info(BIND10_SEND_SIGTERM, component.name(), component.pid())
- try:
- component.kill()
- except OSError:
- # ignore these (usually ESRCH because the child
- # finally exited)
- pass
- # finally, send SIGKILL (unmaskable termination) until everybody dies
- while self.processes:
- # XXX: some delay probably useful... how much is uncertain
- time.sleep(0.1)
- self.reap_children()
- components_to_stop = list(self.processes.values())
- for component in components_to_stop:
- logger.info(BIND10_SEND_SIGKILL, component.name(),
- component.pid())
- try:
- component.kill(True)
- except OSError:
- # ignore these (usually ESRCH because the child
- # finally exited)
- pass
- logger.info(BIND10_SHUTDOWN_COMPLETE)
- def _get_process_exit_status(self):
- return os.waitpid(-1, os.WNOHANG)
- def reap_children(self):
- """Check to see if any of our child processes have exited,
- and note this for later handling.
- """
- while True:
- try:
- (pid, exit_status) = self._get_process_exit_status()
- except OSError as o:
- if o.errno == errno.ECHILD: break
- # XXX: should be impossible to get any other error here
- raise
- if pid == 0: break
- if pid in self.processes:
- # One of the processes we know about. Get information on it.
- component = self.processes.pop(pid)
- if component.running() and self.runnable:
- # Tell it it failed. But only if it matters (we are
- # not shutting down and the component considers itself
- # to be running.
- component.failed(exit_status);
- else:
- logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
- def restart_processes(self):
- """
- Restart any dead processes:
- * Returns the time when the next process is ready to be restarted.
- * If the server is shutting down, returns 0.
- * If there are no processes, returns None.
- The values returned can be safely passed into select() as the
- timeout value.
- """
- next_restart = None
- # if we're shutting down, then don't restart
- if not self.runnable:
- return 0
- # otherwise look through each dead process and try to restart
- still_dead = {}
- now = time.time()
- for proc_info in self.dead_processes.values():
- restart_time = proc_info.restart_schedule.get_restart_time(now)
- if restart_time > now:
- if (next_restart is None) or (next_restart > restart_time):
- next_restart = restart_time
- still_dead[proc_info.pid] = proc_info
- else:
- logger.info(BIND10_RESURRECTING_PROCESS, proc_info.name)
- try:
- proc_info.respawn()
- self.processes[proc_info.pid] = proc_info
- logger.info(BIND10_RESURRECTED_PROCESS, proc_info.name, proc_info.pid)
- except:
- still_dead[proc_info.pid] = proc_info
- # remember any processes that refuse to be resurrected
- self.dead_processes = still_dead
- # return the time when the next process is ready to be restarted
- return next_restart
- # global variables, needed for signal handlers
- options = None
- boss_of_bind = None
- def reaper(signal_number, stack_frame):
- """A child process has died (SIGCHLD received)."""
- # don't do anything...
- # the Python signal handler has been set up to write
- # down a pipe, waking up our select() bit
- pass
- def get_signame(signal_number):
- """Return the symbolic name for a signal."""
- for sig in dir(signal):
- if sig.startswith("SIG") and sig[3].isalnum():
- if getattr(signal, sig) == signal_number:
- return sig
- return "Unknown signal %d" % signal_number
- # XXX: perhaps register atexit() function and invoke that instead
- def fatal_signal(signal_number, stack_frame):
- """We need to exit (SIGINT or SIGTERM received)."""
- global options
- global boss_of_bind
- logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- boss_of_bind.runnable = False
- def process_rename(option, opt_str, value, parser):
- """Function that renames the process if it is requested by a option."""
- isc.util.process.rename(value)
- def parse_args(args=sys.argv[1:], Parser=OptionParser):
- """
- Function for parsing command line arguments. Returns the
- options object from OptionParser.
- """
- parser = Parser(version=VERSION)
- parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
- type="string", default=None,
- help="UNIX domain socket file the b10-msgq daemon will use")
- parser.add_option("-n", "--no-cache", action="store_true", dest="nocache",
- default=False, help="disable hot-spot cache in authoritative DNS server")
- parser.add_option("-u", "--user", dest="user", type="string", default=None,
- help="Change user after startup (must run as root)")
- parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
- help="display more about what is going on")
- parser.add_option("--pretty-name", type="string", action="callback",
- callback=process_rename,
- help="Set the process name (displayed in ps, top, ...)")
- parser.add_option("-c", "--config-file", action="store",
- dest="config_file", default=None,
- help="Configuration database filename")
- parser.add_option("-p", "--data-path", dest="data_path",
- help="Directory to search for configuration files",
- default=None)
- parser.add_option("--cmdctl-port", dest="cmdctl_port", type="int",
- default=None, help="Port of command control")
- parser.add_option("--pid-file", dest="pid_file", type="string",
- default=None,
- help="file to dump the PID of the BIND 10 process")
- parser.add_option("--brittle", dest="brittle", action="store_true",
- help="debugging flag: exit if any component dies")
- parser.add_option("-w", "--wait", dest="wait_time", type="int",
- default=10, help="Time (in seconds) to wait for config manager to start up")
- (options, args) = parser.parse_args(args)
- if options.cmdctl_port is not None:
- try:
- isc.net.parse.port_parse(options.cmdctl_port)
- except ValueError as e:
- parser.error(e)
- if args:
- parser.print_help()
- sys.exit(1)
- return options
- def dump_pid(pid_file):
- """
- Dump the PID of the current process to the specified file. If the given
- file is None this function does nothing. If the file already exists,
- the existing content will be removed. If a system error happens in
- creating or writing to the file, the corresponding exception will be
- propagated to the caller.
- """
- if pid_file is None:
- return
- f = open(pid_file, "w")
- f.write('%d\n' % os.getpid())
- f.close()
- def unlink_pid_file(pid_file):
- """
- Remove the given file, which is basically expected to be the PID file
- created by dump_pid(). The specified may or may not exist; if it
- doesn't this function does nothing. Other system level errors in removing
- the file will be propagated as the corresponding exception.
- """
- if pid_file is None:
- return
- try:
- os.unlink(pid_file)
- except OSError as error:
- if error.errno is not errno.ENOENT:
- raise
- def main():
- global options
- global boss_of_bind
- # Enforce line buffering on stdout, even when not a TTY
- sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)
- options = parse_args()
- # Check user ID.
- setuid = None
- username = None
- if options.user:
- # Try getting information about the user, assuming UID passed.
- try:
- pw_ent = pwd.getpwuid(int(options.user))
- setuid = pw_ent.pw_uid
- username = pw_ent.pw_name
- except ValueError:
- pass
- except KeyError:
- pass
- # Next try getting information about the user, assuming user name
- # passed.
- # If the information is both a valid user name and user number, we
- # prefer the name because we try it second. A minor point, hopefully.
- try:
- pw_ent = pwd.getpwnam(options.user)
- setuid = pw_ent.pw_uid
- username = pw_ent.pw_name
- except KeyError:
- pass
- if setuid is None:
- logger.fatal(BIND10_INVALID_USER, options.user)
- sys.exit(1)
- # Announce startup.
- logger.info(BIND10_STARTING, VERSION)
- # Create wakeup pipe for signal handlers
- wakeup_pipe = os.pipe()
- signal.set_wakeup_fd(wakeup_pipe[1])
- # Set signal handlers for catching child termination, as well
- # as our own demise.
- signal.signal(signal.SIGCHLD, reaper)
- signal.siginterrupt(signal.SIGCHLD, False)
- signal.signal(signal.SIGINT, fatal_signal)
- signal.signal(signal.SIGTERM, fatal_signal)
- # Block SIGPIPE, as we don't want it to end this process
- signal.signal(signal.SIGPIPE, signal.SIG_IGN)
- # Go bob!
- boss_of_bind = BoB(options.msgq_socket_file, options.data_path,
- options.config_file, options.nocache, options.verbose,
- setuid, username, options.cmdctl_port, options.brittle,
- options.wait_time)
- startup_result = boss_of_bind.startup()
- if startup_result:
- logger.fatal(BIND10_STARTUP_ERROR, startup_result)
- sys.exit(1)
- logger.info(BIND10_STARTUP_COMPLETE)
- dump_pid(options.pid_file)
- # In our main loop, we check for dead processes or messages
- # on the c-channel.
- wakeup_fd = wakeup_pipe[0]
- ccs_fd = boss_of_bind.ccs.get_socket().fileno()
- while boss_of_bind.runnable:
- # clean up any processes that exited
- boss_of_bind.reap_children()
- # XXX: As we don't put anything into the processes to be restarted,
- # this is really a complicated NOP. But we will try to reintroduce
- # delayed restarts, so it stays here for now, until we find out if
- # it's useful.
- next_restart = boss_of_bind.restart_processes()
- if next_restart is None:
- wait_time = None
- else:
- wait_time = max(next_restart - time.time(), 0)
- # select() can raise EINTR when a signal arrives,
- # even if they are resumable, so we have to catch
- # the exception
- try:
- (rlist, wlist, xlist) = select.select([wakeup_fd, ccs_fd], [], [],
- wait_time)
- except select.error as err:
- if err.args[0] == errno.EINTR:
- (rlist, wlist, xlist) = ([], [], [])
- else:
- logger.fatal(BIND10_SELECT_ERROR, err)
- break
- for fd in rlist + xlist:
- if fd == ccs_fd:
- try:
- boss_of_bind.ccs.check_command()
- except isc.cc.session.ProtocolError:
- logger.fatal(BIND10_MSGQ_DISAPPEARED)
- self.runnable = False
- break
- elif fd == wakeup_fd:
- os.read(wakeup_fd, 32)
- # shutdown
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- boss_of_bind.shutdown()
- unlink_pid_file(options.pid_file)
- sys.exit(0)
- if __name__ == "__main__":
- main()
|