|
@@ -44,10 +44,12 @@ import os
|
|
# installed on the system
|
|
# installed on the system
|
|
if "B10_FROM_SOURCE" in os.environ:
|
|
if "B10_FROM_SOURCE" in os.environ:
|
|
SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
|
|
SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
|
|
|
|
+ ADD_LIBEXEC_PATH = False
|
|
else:
|
|
else:
|
|
PREFIX = "@prefix@"
|
|
PREFIX = "@prefix@"
|
|
DATAROOTDIR = "@datarootdir@"
|
|
DATAROOTDIR = "@datarootdir@"
|
|
SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
|
|
SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
|
|
|
|
+ ADD_LIBEXEC_PATH = True
|
|
|
|
|
|
import subprocess
|
|
import subprocess
|
|
import signal
|
|
import signal
|
|
@@ -61,20 +63,23 @@ from optparse import OptionParser, OptionValueError
|
|
import io
|
|
import io
|
|
import pwd
|
|
import pwd
|
|
import posix
|
|
import posix
|
|
|
|
+import copy
|
|
|
|
|
|
import isc.cc
|
|
import isc.cc
|
|
import isc.util.process
|
|
import isc.util.process
|
|
import isc.net.parse
|
|
import isc.net.parse
|
|
import isc.log
|
|
import isc.log
|
|
-from bind10_messages import *
|
|
|
|
|
|
+from isc.log_messages.bind10_messages import *
|
|
|
|
+import isc.bind10.component
|
|
|
|
+import isc.bind10.special_component
|
|
|
|
|
|
isc.log.init("b10-boss")
|
|
isc.log.init("b10-boss")
|
|
logger = isc.log.Logger("boss")
|
|
logger = isc.log.Logger("boss")
|
|
|
|
|
|
# Pending system-wide debug level definitions, the ones we
|
|
# Pending system-wide debug level definitions, the ones we
|
|
# use here are hardcoded for now
|
|
# use here are hardcoded for now
|
|
-DBG_PROCESS = 10
|
|
|
|
-DBG_COMMANDS = 30
|
|
|
|
|
|
+DBG_PROCESS = logger.DBGLVL_TRACE_BASIC
|
|
|
|
+DBG_COMMANDS = logger.DBGLVL_TRACE_DETAIL
|
|
|
|
|
|
# Assign this process some longer name
|
|
# Assign this process some longer name
|
|
isc.util.process.rename(sys.argv[0])
|
|
isc.util.process.rename(sys.argv[0])
|
|
@@ -84,54 +89,9 @@ isc.util.process.rename(sys.argv[0])
|
|
# number, and the overall BIND 10 version number (set in configure.ac).
|
|
# number, and the overall BIND 10 version number (set in configure.ac).
|
|
VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
|
|
VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
|
|
|
|
|
|
-# This is for bind10.boottime of stats module
|
|
|
|
|
|
+# This is for boot_time of Boss
|
|
_BASETIME = time.gmtime()
|
|
_BASETIME = time.gmtime()
|
|
|
|
|
|
-class RestartSchedule:
|
|
|
|
- """
|
|
|
|
-Keeps state when restarting something (in this case, a process).
|
|
|
|
-
|
|
|
|
-When a process dies unexpectedly, we need to restart it. However, if
|
|
|
|
-it fails to restart for some reason, then we should not simply keep
|
|
|
|
-restarting it at high speed.
|
|
|
|
-
|
|
|
|
-A more sophisticated algorithm can be developed, but for now we choose
|
|
|
|
-a simple set of rules:
|
|
|
|
-
|
|
|
|
- * If a process was been running for >=10 seconds, we restart it
|
|
|
|
- right away.
|
|
|
|
- * If a process was running for <10 seconds, we wait until 10 seconds
|
|
|
|
- after it was started.
|
|
|
|
-
|
|
|
|
-To avoid programs getting into lockstep, we use a normal distribution
|
|
|
|
-to avoid being restarted at exactly 10 seconds."""
|
|
|
|
-
|
|
|
|
- def __init__(self, restart_frequency=10.0):
|
|
|
|
- self.restart_frequency = restart_frequency
|
|
|
|
- self.run_start_time = None
|
|
|
|
- self.run_stop_time = None
|
|
|
|
- self.restart_time = None
|
|
|
|
-
|
|
|
|
- def set_run_start_time(self, when=None):
|
|
|
|
- if when is None:
|
|
|
|
- when = time.time()
|
|
|
|
- self.run_start_time = when
|
|
|
|
- sigma = self.restart_frequency * 0.05
|
|
|
|
- self.restart_time = when + random.normalvariate(self.restart_frequency,
|
|
|
|
- sigma)
|
|
|
|
-
|
|
|
|
- def set_run_stop_time(self, when=None):
|
|
|
|
- """We don't actually do anything with stop time now, but it
|
|
|
|
- might be useful for future algorithms."""
|
|
|
|
- if when is None:
|
|
|
|
- when = time.time()
|
|
|
|
- self.run_stop_time = when
|
|
|
|
-
|
|
|
|
- def get_restart_time(self, when=None):
|
|
|
|
- if when is None:
|
|
|
|
- when = time.time()
|
|
|
|
- return max(when, self.restart_time)
|
|
|
|
-
|
|
|
|
class ProcessInfoError(Exception): pass
|
|
class ProcessInfoError(Exception): pass
|
|
|
|
|
|
class ProcessInfo:
|
|
class ProcessInfo:
|
|
@@ -146,7 +106,6 @@ class ProcessInfo:
|
|
self.env = env
|
|
self.env = env
|
|
self.dev_null_stdout = dev_null_stdout
|
|
self.dev_null_stdout = dev_null_stdout
|
|
self.dev_null_stderr = dev_null_stderr
|
|
self.dev_null_stderr = dev_null_stderr
|
|
- self.restart_schedule = RestartSchedule()
|
|
|
|
self.uid = uid
|
|
self.uid = uid
|
|
self.username = username
|
|
self.username = username
|
|
self.process = None
|
|
self.process = None
|
|
@@ -183,9 +142,9 @@ class ProcessInfo:
|
|
# Environment variables for the child process will be a copy of those
|
|
# Environment variables for the child process will be a copy of those
|
|
# of the boss process with any additional specific variables given
|
|
# of the boss process with any additional specific variables given
|
|
# on construction (self.env).
|
|
# on construction (self.env).
|
|
- spawn_env = os.environ
|
|
|
|
|
|
+ spawn_env = copy.deepcopy(os.environ)
|
|
spawn_env.update(self.env)
|
|
spawn_env.update(self.env)
|
|
- if 'B10_FROM_SOURCE' not in os.environ:
|
|
|
|
|
|
+ if ADD_LIBEXEC_PATH:
|
|
spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
|
|
spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
|
|
self.process = subprocess.Popen(self.args,
|
|
self.process = subprocess.Popen(self.args,
|
|
stdin=subprocess.PIPE,
|
|
stdin=subprocess.PIPE,
|
|
@@ -195,7 +154,6 @@ class ProcessInfo:
|
|
env=spawn_env,
|
|
env=spawn_env,
|
|
preexec_fn=self._preexec_work)
|
|
preexec_fn=self._preexec_work)
|
|
self.pid = self.process.pid
|
|
self.pid = self.process.pid
|
|
- self.restart_schedule.set_run_start_time()
|
|
|
|
|
|
|
|
# spawn() and respawn() are the same for now, but in the future they
|
|
# spawn() and respawn() are the same for now, but in the future they
|
|
# may have different functionality
|
|
# may have different functionality
|
|
@@ -207,12 +165,14 @@ class ProcessInfo:
|
|
|
|
|
|
class CChannelConnectError(Exception): pass
|
|
class CChannelConnectError(Exception): pass
|
|
|
|
|
|
|
|
+class ProcessStartError(Exception): pass
|
|
|
|
+
|
|
class BoB:
|
|
class BoB:
|
|
"""Boss of BIND class."""
|
|
"""Boss of BIND class."""
|
|
|
|
|
|
def __init__(self, msgq_socket_file=None, data_path=None,
|
|
def __init__(self, msgq_socket_file=None, data_path=None,
|
|
config_filename=None, nocache=False, verbose=False, setuid=None,
|
|
config_filename=None, nocache=False, verbose=False, setuid=None,
|
|
- username=None, cmdctl_port=None, brittle=False):
|
|
|
|
|
|
+ username=None, cmdctl_port=None, wait_time=10):
|
|
"""
|
|
"""
|
|
Initialize the Boss of BIND. This is a singleton (only one can run).
|
|
Initialize the Boss of BIND. This is a singleton (only one can run).
|
|
|
|
|
|
@@ -220,26 +180,30 @@ class BoB:
|
|
msgq process listens on. If verbose is True, then the boss reports
|
|
msgq process listens on. If verbose is True, then the boss reports
|
|
what it is doing.
|
|
what it is doing.
|
|
|
|
|
|
- Data path and config filename are passed trough to config manager
|
|
|
|
|
|
+ Data path and config filename are passed through to config manager
|
|
(if provided) and specify the config file to be used.
|
|
(if provided) and specify the config file to be used.
|
|
|
|
|
|
The cmdctl_port is passed to cmdctl and specify on which port it
|
|
The cmdctl_port is passed to cmdctl and specify on which port it
|
|
should listen.
|
|
should listen.
|
|
|
|
+
|
|
|
|
+ wait_time controls the amount of time (in seconds) that Boss waits
|
|
|
|
+ for selected processes to initialize before continuing with the
|
|
|
|
+ initialization. Currently this is only the configuration manager.
|
|
"""
|
|
"""
|
|
self.cc_session = None
|
|
self.cc_session = None
|
|
self.ccs = None
|
|
self.ccs = None
|
|
- self.cfg_start_auth = True
|
|
|
|
- self.cfg_start_resolver = False
|
|
|
|
- self.cfg_start_dhcp6 = False
|
|
|
|
- self.cfg_start_dhcp4 = False
|
|
|
|
- self.started_auth_family = False
|
|
|
|
- self.started_resolver_family = False
|
|
|
|
self.curproc = None
|
|
self.curproc = None
|
|
- self.dead_processes = {}
|
|
|
|
self.msgq_socket_file = msgq_socket_file
|
|
self.msgq_socket_file = msgq_socket_file
|
|
self.nocache = nocache
|
|
self.nocache = nocache
|
|
- self.processes = {}
|
|
|
|
- self.expected_shutdowns = {}
|
|
|
|
|
|
+ self.component_config = {}
|
|
|
|
+ # Some time in future, it may happen that a single component has
|
|
|
|
+ # multple processes. If so happens, name "components" may be
|
|
|
|
+ # inapropriate. But as the code isn't probably completely ready
|
|
|
|
+ # for it, we leave it at components for now.
|
|
|
|
+ self.components = {}
|
|
|
|
+ # Simply list of components that died and need to wait for a
|
|
|
|
+ # restart. Components manage their own restart schedule now
|
|
|
|
+ self.components_to_restart = []
|
|
self.runnable = False
|
|
self.runnable = False
|
|
self.uid = setuid
|
|
self.uid = setuid
|
|
self.username = username
|
|
self.username = username
|
|
@@ -247,64 +211,76 @@ class BoB:
|
|
self.data_path = data_path
|
|
self.data_path = data_path
|
|
self.config_filename = config_filename
|
|
self.config_filename = config_filename
|
|
self.cmdctl_port = cmdctl_port
|
|
self.cmdctl_port = cmdctl_port
|
|
- self.brittle = brittle
|
|
|
|
|
|
+ self.wait_time = wait_time
|
|
|
|
+ self._component_configurator = isc.bind10.component.Configurator(self,
|
|
|
|
+ isc.bind10.special_component.get_specials())
|
|
|
|
+ # The priorities here make them start in the correct order. First
|
|
|
|
+ # the socket creator (which would drop root privileges by then),
|
|
|
|
+ # then message queue and after that the config manager (which uses
|
|
|
|
+ # the config manager)
|
|
|
|
+ self.__core_components = {
|
|
|
|
+ 'sockcreator': {
|
|
|
|
+ 'kind': 'core',
|
|
|
|
+ 'special': 'sockcreator',
|
|
|
|
+ 'priority': 200
|
|
|
|
+ },
|
|
|
|
+ 'msgq': {
|
|
|
|
+ 'kind': 'core',
|
|
|
|
+ 'special': 'msgq',
|
|
|
|
+ 'priority': 199
|
|
|
|
+ },
|
|
|
|
+ 'cfgmgr': {
|
|
|
|
+ 'kind': 'core',
|
|
|
|
+ 'special': 'cfgmgr',
|
|
|
|
+ 'priority': 198
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ self.__started = False
|
|
|
|
+ self.exitcode = 0
|
|
|
|
+
|
|
|
|
+ # If -v was set, enable full debug logging.
|
|
|
|
+ if self.verbose:
|
|
|
|
+ logger.set_severity("DEBUG", 99)
|
|
|
|
+
|
|
|
|
+ def __propagate_component_config(self, config):
|
|
|
|
+ comps = dict(config)
|
|
|
|
+ # Fill in the core components, so they stay alive
|
|
|
|
+ for comp in self.__core_components:
|
|
|
|
+ if comp in comps:
|
|
|
|
+ raise Exception(comp + " is core component managed by " +
|
|
|
|
+ "bind10 boss, do not set it")
|
|
|
|
+ comps[comp] = self.__core_components[comp]
|
|
|
|
+ # Update the configuration
|
|
|
|
+ self._component_configurator.reconfigure(comps)
|
|
|
|
|
|
def config_handler(self, new_config):
|
|
def config_handler(self, new_config):
|
|
# If this is initial update, don't do anything now, leave it to startup
|
|
# If this is initial update, don't do anything now, leave it to startup
|
|
if not self.runnable:
|
|
if not self.runnable:
|
|
return
|
|
return
|
|
- # Now we declare few functions used only internally here. Besides the
|
|
|
|
- # benefit of not polluting the name space, they are closures, so we
|
|
|
|
- # don't need to pass some variables
|
|
|
|
- def start_stop(name, started, start, stop):
|
|
|
|
- if not'start_' + name in new_config:
|
|
|
|
- return
|
|
|
|
- if new_config['start_' + name]:
|
|
|
|
- if not started:
|
|
|
|
- if self.uid is not None:
|
|
|
|
- logger.info(BIND10_START_AS_NON_ROOT, name)
|
|
|
|
- start()
|
|
|
|
- else:
|
|
|
|
- stop()
|
|
|
|
- # These four functions are passed to start_stop (smells like functional
|
|
|
|
- # programming little bit)
|
|
|
|
- def resolver_on():
|
|
|
|
- self.start_resolver(self.c_channel_env)
|
|
|
|
- self.started_resolver_family = True
|
|
|
|
- def resolver_off():
|
|
|
|
- self.stop_resolver()
|
|
|
|
- self.started_resolver_family = False
|
|
|
|
- def auth_on():
|
|
|
|
- self.start_auth(self.c_channel_env)
|
|
|
|
- self.start_xfrout(self.c_channel_env)
|
|
|
|
- self.start_xfrin(self.c_channel_env)
|
|
|
|
- self.start_zonemgr(self.c_channel_env)
|
|
|
|
- self.started_auth_family = True
|
|
|
|
- def auth_off():
|
|
|
|
- self.stop_zonemgr()
|
|
|
|
- self.stop_xfrin()
|
|
|
|
- self.stop_xfrout()
|
|
|
|
- self.stop_auth()
|
|
|
|
- self.started_auth_family = False
|
|
|
|
-
|
|
|
|
- # The real code of the config handler function follows here
|
|
|
|
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
|
|
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
|
|
new_config)
|
|
new_config)
|
|
- start_stop('resolver', self.started_resolver_family, resolver_on,
|
|
|
|
- resolver_off)
|
|
|
|
- start_stop('auth', self.started_auth_family, auth_on, auth_off)
|
|
|
|
-
|
|
|
|
- answer = isc.config.ccsession.create_answer(0)
|
|
|
|
- return answer
|
|
|
|
|
|
+ try:
|
|
|
|
+ if 'components' in new_config:
|
|
|
|
+ self.__propagate_component_config(new_config['components'])
|
|
|
|
+ return isc.config.ccsession.create_answer(0)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ return isc.config.ccsession.create_answer(1, str(e))
|
|
|
|
|
|
def get_processes(self):
|
|
def get_processes(self):
|
|
- pids = list(self.processes.keys())
|
|
|
|
|
|
+ pids = list(self.components.keys())
|
|
pids.sort()
|
|
pids.sort()
|
|
process_list = [ ]
|
|
process_list = [ ]
|
|
for pid in pids:
|
|
for pid in pids:
|
|
- process_list.append([pid, self.processes[pid].name])
|
|
|
|
|
|
+ process_list.append([pid, self.components[pid].name()])
|
|
return process_list
|
|
return process_list
|
|
|
|
|
|
|
|
+ def _get_stats_data(self):
|
|
|
|
+ return { "owner": "Boss",
|
|
|
|
+ "data": { 'boot_time':
|
|
|
|
+ time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
def command_handler(self, command, args):
|
|
def command_handler(self, command, args):
|
|
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
|
|
logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
|
|
answer = isc.config.ccsession.create_answer(1, "command not implemented")
|
|
answer = isc.config.ccsession.create_answer(1, "command not implemented")
|
|
@@ -314,15 +290,26 @@ class BoB:
|
|
if command == "shutdown":
|
|
if command == "shutdown":
|
|
self.runnable = False
|
|
self.runnable = False
|
|
answer = isc.config.ccsession.create_answer(0)
|
|
answer = isc.config.ccsession.create_answer(0)
|
|
|
|
+ elif command == "getstats":
|
|
|
|
+ answer = isc.config.ccsession.create_answer(0, self._get_stats_data())
|
|
elif command == "sendstats":
|
|
elif command == "sendstats":
|
|
# send statistics data to the stats daemon immediately
|
|
# send statistics data to the stats daemon immediately
|
|
- cmd = isc.config.ccsession.create_command(
|
|
|
|
- 'set', { "stats_data": {
|
|
|
|
- 'bind10.boot_time': time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
|
|
|
|
- }})
|
|
|
|
- seq = self.cc_session.group_sendmsg(cmd, 'Stats')
|
|
|
|
- self.cc_session.group_recvmsg(True, seq)
|
|
|
|
- answer = isc.config.ccsession.create_answer(0)
|
|
|
|
|
|
+ stats_data = self._get_stats_data()
|
|
|
|
+ valid = self.ccs.get_module_spec().validate_statistics(
|
|
|
|
+ True, stats_data["data"])
|
|
|
|
+ if valid:
|
|
|
|
+ cmd = isc.config.ccsession.create_command('set', stats_data)
|
|
|
|
+ seq = self.cc_session.group_sendmsg(cmd, 'Stats')
|
|
|
|
+ # Consume the answer, in case it becomes a orphan message.
|
|
|
|
+ try:
|
|
|
|
+ self.cc_session.group_recvmsg(False, seq)
|
|
|
|
+ except isc.cc.session.SessionTimeout:
|
|
|
|
+ pass
|
|
|
|
+ answer = isc.config.ccsession.create_answer(0)
|
|
|
|
+ else:
|
|
|
|
+ logger.fatal(BIND10_INVALID_STATISTICS_DATA);
|
|
|
|
+ answer = isc.config.ccsession.create_answer(
|
|
|
|
+ 1, "specified statistics data is invalid")
|
|
elif command == "ping":
|
|
elif command == "ping":
|
|
answer = isc.config.ccsession.create_answer(0, "pong")
|
|
answer = isc.config.ccsession.create_answer(0, "pong")
|
|
elif command == "show_processes":
|
|
elif command == "show_processes":
|
|
@@ -333,7 +320,7 @@ class BoB:
|
|
"Unknown command")
|
|
"Unknown command")
|
|
return answer
|
|
return answer
|
|
|
|
|
|
- def kill_started_processes(self):
|
|
|
|
|
|
+ def kill_started_components(self):
|
|
"""
|
|
"""
|
|
Called as part of the exception handling when a process fails to
|
|
Called as part of the exception handling when a process fails to
|
|
start, this runs through the list of started processes, killing
|
|
start, this runs through the list of started processes, killing
|
|
@@ -341,29 +328,25 @@ class BoB:
|
|
"""
|
|
"""
|
|
logger.info(BIND10_KILLING_ALL_PROCESSES)
|
|
logger.info(BIND10_KILLING_ALL_PROCESSES)
|
|
|
|
|
|
- for pid in self.processes:
|
|
|
|
- logger.info(BIND10_KILL_PROCESS, self.processes[pid].name)
|
|
|
|
- self.processes[pid].process.kill()
|
|
|
|
- self.processes = {}
|
|
|
|
|
|
+ for pid in self.components:
|
|
|
|
+ logger.info(BIND10_KILL_PROCESS, self.components[pid].name())
|
|
|
|
+ self.components[pid].kill(True)
|
|
|
|
+ self.components = {}
|
|
|
|
|
|
- def read_bind10_config(self):
|
|
|
|
|
|
+ def _read_bind10_config(self):
|
|
"""
|
|
"""
|
|
Reads the parameters associated with the BoB module itself.
|
|
Reads the parameters associated with the BoB module itself.
|
|
|
|
|
|
- At present these are the components to start although arguably this
|
|
|
|
- information should be in the configuration for the appropriate
|
|
|
|
- module itself. (However, this would cause difficulty in the case of
|
|
|
|
- xfrin/xfrout and zone manager as we don't need to start those if we
|
|
|
|
- are not running the authoritative server.)
|
|
|
|
|
|
+ This means the list of components we should start now.
|
|
|
|
+
|
|
|
|
+ This could easily be combined into start_all_processes, but
|
|
|
|
+ it stays because of historical reasons and because the tests
|
|
|
|
+ replace the method sometimes.
|
|
"""
|
|
"""
|
|
logger.info(BIND10_READING_BOSS_CONFIGURATION)
|
|
logger.info(BIND10_READING_BOSS_CONFIGURATION)
|
|
|
|
|
|
config_data = self.ccs.get_full_config()
|
|
config_data = self.ccs.get_full_config()
|
|
- self.cfg_start_auth = config_data.get("start_auth")
|
|
|
|
- self.cfg_start_resolver = config_data.get("start_resolver")
|
|
|
|
-
|
|
|
|
- logger.info(BIND10_CONFIGURATION_START_AUTH, self.cfg_start_auth)
|
|
|
|
- logger.info(BIND10_CONFIGURATION_START_RESOLVER, self.cfg_start_resolver)
|
|
|
|
|
|
+ self.__propagate_component_config(config_data['components'])
|
|
|
|
|
|
def log_starting(self, process, port = None, address = None):
|
|
def log_starting(self, process, port = None, address = None):
|
|
"""
|
|
"""
|
|
@@ -399,22 +382,42 @@ class BoB:
|
|
else:
|
|
else:
|
|
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
|
|
logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)
|
|
|
|
|
|
|
|
+ def process_running(self, msg, who):
|
|
|
|
+ """
|
|
|
|
+ Some processes return a message to the Boss after they have
|
|
|
|
+ started to indicate that they are running. The form of the
|
|
|
|
+ message is a dictionary with contents {"running:", "<process>"}.
|
|
|
|
+ This method checks the passed message and returns True if the
|
|
|
|
+ "who" process is contained in the message (so is presumably
|
|
|
|
+ running). It returns False for all other conditions and will
|
|
|
|
+ log an error if appropriate.
|
|
|
|
+ """
|
|
|
|
+ if msg is not None:
|
|
|
|
+ try:
|
|
|
|
+ if msg["running"] == who:
|
|
|
|
+ return True
|
|
|
|
+ else:
|
|
|
|
+ logger.error(BIND10_STARTUP_UNEXPECTED_MESSAGE, msg)
|
|
|
|
+ except:
|
|
|
|
+ logger.error(BIND10_STARTUP_UNRECOGNISED_MESSAGE, msg)
|
|
|
|
+
|
|
|
|
+ return False
|
|
|
|
+
|
|
# The next few methods start the individual processes of BIND-10. They
|
|
# The next few methods start the individual processes of BIND-10. They
|
|
# are called via start_all_processes(). If any fail, an exception is
|
|
# are called via start_all_processes(). If any fail, an exception is
|
|
# raised which is caught by the caller of start_all_processes(); this kills
|
|
# raised which is caught by the caller of start_all_processes(); this kills
|
|
# processes started up to that point before terminating the program.
|
|
# processes started up to that point before terminating the program.
|
|
|
|
|
|
- def start_msgq(self, c_channel_env):
|
|
|
|
|
|
+ def start_msgq(self):
|
|
"""
|
|
"""
|
|
Start the message queue and connect to the command channel.
|
|
Start the message queue and connect to the command channel.
|
|
"""
|
|
"""
|
|
self.log_starting("b10-msgq")
|
|
self.log_starting("b10-msgq")
|
|
- c_channel = ProcessInfo("b10-msgq", ["b10-msgq"], c_channel_env,
|
|
|
|
|
|
+ msgq_proc = ProcessInfo("b10-msgq", ["b10-msgq"], self.c_channel_env,
|
|
True, not self.verbose, uid=self.uid,
|
|
True, not self.verbose, uid=self.uid,
|
|
username=self.username)
|
|
username=self.username)
|
|
- c_channel.spawn()
|
|
|
|
- self.processes[c_channel.pid] = c_channel
|
|
|
|
- self.log_started(c_channel.pid)
|
|
|
|
|
|
+ msgq_proc.spawn()
|
|
|
|
+ self.log_started(msgq_proc.pid)
|
|
|
|
|
|
# Now connect to the c-channel
|
|
# Now connect to the c-channel
|
|
cc_connect_start = time.time()
|
|
cc_connect_start = time.time()
|
|
@@ -429,7 +432,13 @@ class BoB:
|
|
except isc.cc.session.SessionError:
|
|
except isc.cc.session.SessionError:
|
|
time.sleep(0.1)
|
|
time.sleep(0.1)
|
|
|
|
|
|
- def start_cfgmgr(self, c_channel_env):
|
|
|
|
|
|
+ # Subscribe to the message queue. The only messages we expect to receive
|
|
|
|
+ # on this channel are once relating to process startup.
|
|
|
|
+ self.cc_session.group_subscribe("Boss")
|
|
|
|
+
|
|
|
|
+ return msgq_proc
|
|
|
|
+
|
|
|
|
+ def start_cfgmgr(self):
|
|
"""
|
|
"""
|
|
Starts the configuration manager process
|
|
Starts the configuration manager process
|
|
"""
|
|
"""
|
|
@@ -440,17 +449,25 @@ class BoB:
|
|
if self.config_filename is not None:
|
|
if self.config_filename is not None:
|
|
args.append("--config-filename=" + self.config_filename)
|
|
args.append("--config-filename=" + self.config_filename)
|
|
bind_cfgd = ProcessInfo("b10-cfgmgr", args,
|
|
bind_cfgd = ProcessInfo("b10-cfgmgr", args,
|
|
- c_channel_env, uid=self.uid,
|
|
|
|
|
|
+ self.c_channel_env, uid=self.uid,
|
|
username=self.username)
|
|
username=self.username)
|
|
bind_cfgd.spawn()
|
|
bind_cfgd.spawn()
|
|
- self.processes[bind_cfgd.pid] = bind_cfgd
|
|
|
|
self.log_started(bind_cfgd.pid)
|
|
self.log_started(bind_cfgd.pid)
|
|
|
|
|
|
- # sleep until b10-cfgmgr is fully up and running, this is a good place
|
|
|
|
- # to have a (short) timeout on synchronized groupsend/receive
|
|
|
|
- # TODO: replace the sleep by a listen for ConfigManager started
|
|
|
|
- # message
|
|
|
|
- time.sleep(1)
|
|
|
|
|
|
+ # Wait for the configuration manager to start up as subsequent initialization
|
|
|
|
+ # cannot proceed without it. The time to wait can be set on the command line.
|
|
|
|
+ time_remaining = self.wait_time
|
|
|
|
+ msg, env = self.cc_session.group_recvmsg()
|
|
|
|
+ while time_remaining > 0 and not self.process_running(msg, "ConfigManager"):
|
|
|
|
+ logger.debug(DBG_PROCESS, BIND10_WAIT_CFGMGR)
|
|
|
|
+ time.sleep(1)
|
|
|
|
+ time_remaining = time_remaining - 1
|
|
|
|
+ msg, env = self.cc_session.group_recvmsg()
|
|
|
|
+
|
|
|
|
+ if not self.process_running(msg, "ConfigManager"):
|
|
|
|
+ raise ProcessStartError("Configuration manager process has not started")
|
|
|
|
+
|
|
|
|
+ return bind_cfgd
|
|
|
|
|
|
def start_ccsession(self, c_channel_env):
|
|
def start_ccsession(self, c_channel_env):
|
|
"""
|
|
"""
|
|
@@ -458,13 +475,17 @@ class BoB:
|
|
|
|
|
|
The argument c_channel_env is unused but is supplied to keep the
|
|
The argument c_channel_env is unused but is supplied to keep the
|
|
argument list the same for all start_xxx methods.
|
|
argument list the same for all start_xxx methods.
|
|
|
|
+
|
|
|
|
+ With regards to logging, note that as the CC session is not a
|
|
|
|
+ process, the log_starting/log_started methods are not used.
|
|
"""
|
|
"""
|
|
- self.log_starting("ccsession")
|
|
|
|
|
|
+ logger.info(BIND10_STARTING_CC)
|
|
self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
|
|
self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
|
|
self.config_handler,
|
|
self.config_handler,
|
|
- self.command_handler)
|
|
|
|
|
|
+ self.command_handler,
|
|
|
|
+ socket_file = self.msgq_socket_file)
|
|
self.ccs.start()
|
|
self.ccs.start()
|
|
- self.log_started()
|
|
|
|
|
|
+ logger.debug(DBG_PROCESS, BIND10_STARTED_CC)
|
|
|
|
|
|
# A couple of utility methods for starting processes...
|
|
# A couple of utility methods for starting processes...
|
|
|
|
|
|
@@ -479,10 +500,20 @@ class BoB:
|
|
self.log_starting(name, port, address)
|
|
self.log_starting(name, port, address)
|
|
newproc = ProcessInfo(name, args, c_channel_env)
|
|
newproc = ProcessInfo(name, args, c_channel_env)
|
|
newproc.spawn()
|
|
newproc.spawn()
|
|
- self.processes[newproc.pid] = newproc
|
|
|
|
self.log_started(newproc.pid)
|
|
self.log_started(newproc.pid)
|
|
|
|
+ return newproc
|
|
|
|
+
|
|
|
|
+ def register_process(self, pid, component):
|
|
|
|
+ """
|
|
|
|
+ Put another process into boss to watch over it. When the process
|
|
|
|
+ dies, the component.failed() is called with the exit code.
|
|
|
|
|
|
- def start_simple(self, name, c_channel_env, port=None, address=None):
|
|
|
|
|
|
+ It is expected the info is a isc.bind10.component.BaseComponent
|
|
|
|
+ subclass (or anything having the same interface).
|
|
|
|
+ """
|
|
|
|
+ self.components[pid] = component
|
|
|
|
+
|
|
|
|
+ def start_simple(self, name):
|
|
"""
|
|
"""
|
|
Most of the BIND-10 processes are started with the command:
|
|
Most of the BIND-10 processes are started with the command:
|
|
|
|
|
|
@@ -499,7 +530,7 @@ class BoB:
|
|
args += ['-v']
|
|
args += ['-v']
|
|
|
|
|
|
# ... and start the process
|
|
# ... and start the process
|
|
- self.start_process(name, args, c_channel_env, port, address)
|
|
|
|
|
|
+ return self.start_process(name, args, self.c_channel_env)
|
|
|
|
|
|
# The next few methods start up the rest of the BIND-10 processes.
|
|
# The next few methods start up the rest of the BIND-10 processes.
|
|
# Although many of these methods are little more than a call to
|
|
# Although many of these methods are little more than a call to
|
|
@@ -507,10 +538,12 @@ class BoB:
|
|
# where modifications can be made if the process start-up sequence changes
|
|
# where modifications can be made if the process start-up sequence changes
|
|
# for a given process.
|
|
# for a given process.
|
|
|
|
|
|
- def start_auth(self, c_channel_env):
|
|
|
|
|
|
+ def start_auth(self):
|
|
"""
|
|
"""
|
|
Start the Authoritative server
|
|
Start the Authoritative server
|
|
"""
|
|
"""
|
|
|
|
+ if self.uid is not None and self.__started:
|
|
|
|
+ logger.warn(BIND10_START_AS_NON_ROOT_AUTH)
|
|
authargs = ['b10-auth']
|
|
authargs = ['b10-auth']
|
|
if self.nocache:
|
|
if self.nocache:
|
|
authargs += ['-n']
|
|
authargs += ['-n']
|
|
@@ -520,14 +553,16 @@ class BoB:
|
|
authargs += ['-v']
|
|
authargs += ['-v']
|
|
|
|
|
|
# ... and start
|
|
# ... and start
|
|
- self.start_process("b10-auth", authargs, c_channel_env)
|
|
|
|
|
|
+ return self.start_process("b10-auth", authargs, self.c_channel_env)
|
|
|
|
|
|
- def start_resolver(self, c_channel_env):
|
|
|
|
|
|
+ def start_resolver(self):
|
|
"""
|
|
"""
|
|
Start the Resolver. At present, all these arguments and switches
|
|
Start the Resolver. At present, all these arguments and switches
|
|
are pure speculation. As with the auth daemon, they should be
|
|
are pure speculation. As with the auth daemon, they should be
|
|
read from the configuration database.
|
|
read from the configuration database.
|
|
"""
|
|
"""
|
|
|
|
+ if self.uid is not None and self.__started:
|
|
|
|
+ logger.warn(BIND10_START_AS_NON_ROOT_RESOLVER)
|
|
self.curproc = "b10-resolver"
|
|
self.curproc = "b10-resolver"
|
|
# XXX: this must be read from the configuration manager in the future
|
|
# XXX: this must be read from the configuration manager in the future
|
|
resargs = ['b10-resolver']
|
|
resargs = ['b10-resolver']
|
|
@@ -537,80 +572,38 @@ class BoB:
|
|
resargs += ['-v']
|
|
resargs += ['-v']
|
|
|
|
|
|
# ... and start
|
|
# ... and start
|
|
- self.start_process("b10-resolver", resargs, c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_xfrout(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-xfrout", c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_xfrin(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-xfrin", c_channel_env)
|
|
|
|
|
|
+ return self.start_process("b10-resolver", resargs, self.c_channel_env)
|
|
|
|
|
|
- def start_zonemgr(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-zonemgr", c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_stats(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-stats", c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_stats_httpd(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-stats-httpd", c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_dhcp6(self, c_channel_env):
|
|
|
|
- self.start_simple("b10-dhcp6", c_channel_env)
|
|
|
|
-
|
|
|
|
- def start_cmdctl(self, c_channel_env):
|
|
|
|
|
|
+ def start_cmdctl(self):
|
|
"""
|
|
"""
|
|
Starts the command control process
|
|
Starts the command control process
|
|
"""
|
|
"""
|
|
args = ["b10-cmdctl"]
|
|
args = ["b10-cmdctl"]
|
|
if self.cmdctl_port is not None:
|
|
if self.cmdctl_port is not None:
|
|
args.append("--port=" + str(self.cmdctl_port))
|
|
args.append("--port=" + str(self.cmdctl_port))
|
|
- self.start_process("b10-cmdctl", args, c_channel_env, self.cmdctl_port)
|
|
|
|
|
|
+ if self.verbose:
|
|
|
|
+ args.append("-v")
|
|
|
|
+ return self.start_process("b10-cmdctl", args, self.c_channel_env,
|
|
|
|
+ self.cmdctl_port)
|
|
|
|
|
|
- def start_all_processes(self):
|
|
|
|
|
|
+ def start_all_components(self):
|
|
"""
|
|
"""
|
|
- Starts up all the processes. Any exception generated during the
|
|
|
|
- starting of the processes is handled by the caller.
|
|
|
|
|
|
+ Starts up all the components. Any exception generated during the
|
|
|
|
+ starting of the components is handled by the caller.
|
|
"""
|
|
"""
|
|
- c_channel_env = self.c_channel_env
|
|
|
|
- self.start_msgq(c_channel_env)
|
|
|
|
- self.start_cfgmgr(c_channel_env)
|
|
|
|
- self.start_ccsession(c_channel_env)
|
|
|
|
-
|
|
|
|
- # Extract the parameters associated with Bob. This can only be
|
|
|
|
- # done after the CC Session is started.
|
|
|
|
- self.read_bind10_config()
|
|
|
|
-
|
|
|
|
- # Continue starting the processes. The authoritative server (if
|
|
|
|
- # selected):
|
|
|
|
- if self.cfg_start_auth:
|
|
|
|
- self.start_auth(c_channel_env)
|
|
|
|
-
|
|
|
|
- # ... and resolver (if selected):
|
|
|
|
- if self.cfg_start_resolver:
|
|
|
|
- self.start_resolver(c_channel_env)
|
|
|
|
- self.started_resolver_family = True
|
|
|
|
-
|
|
|
|
- # Everything after the main components can run as non-root.
|
|
|
|
- # TODO: this is only temporary - once the privileged socket creator is
|
|
|
|
- # fully working, nothing else will run as root.
|
|
|
|
- if self.uid is not None:
|
|
|
|
- posix.setuid(self.uid)
|
|
|
|
|
|
+ # Start the real core (sockcreator, msgq, cfgmgr)
|
|
|
|
+ self._component_configurator.startup(self.__core_components)
|
|
|
|
|
|
- # xfrin/xfrout and the zone manager are only meaningful if the
|
|
|
|
- # authoritative server has been started.
|
|
|
|
- if self.cfg_start_auth:
|
|
|
|
- self.start_xfrout(c_channel_env)
|
|
|
|
- self.start_xfrin(c_channel_env)
|
|
|
|
- self.start_zonemgr(c_channel_env)
|
|
|
|
- self.started_auth_family = True
|
|
|
|
|
|
+ # Connect to the msgq. This is not a process, so it's not handled
|
|
|
|
+ # inside the configurator.
|
|
|
|
+ self.start_ccsession(self.c_channel_env)
|
|
|
|
|
|
- # ... and finally start the remaining processes
|
|
|
|
- self.start_stats(c_channel_env)
|
|
|
|
- self.start_stats_httpd(c_channel_env)
|
|
|
|
- self.start_cmdctl(c_channel_env)
|
|
|
|
|
|
+ # Extract the parameters associated with Bob. This can only be
|
|
|
|
+ # done after the CC Session is started. Note that the logging
|
|
|
|
+ # configuration may override the "-v" switch set on the command line.
|
|
|
|
+ self._read_bind10_config()
|
|
|
|
|
|
- if self.cfg_start_dhcp6:
|
|
|
|
- self.start_dhcp6(c_channel_env)
|
|
|
|
|
|
+ # TODO: Return the dropping of privileges
|
|
|
|
|
|
def startup(self):
|
|
def startup(self):
|
|
"""
|
|
"""
|
|
@@ -634,97 +627,81 @@ class BoB:
|
|
# this is the case we want, where the msgq is not running
|
|
# this is the case we want, where the msgq is not running
|
|
pass
|
|
pass
|
|
|
|
|
|
- # Start all processes. If any one fails to start, kill all started
|
|
|
|
- # processes and exit with an error indication.
|
|
|
|
|
|
+ # Start all components. If any one fails to start, kill all started
|
|
|
|
+ # components and exit with an error indication.
|
|
try:
|
|
try:
|
|
self.c_channel_env = c_channel_env
|
|
self.c_channel_env = c_channel_env
|
|
- self.start_all_processes()
|
|
|
|
|
|
+ self.start_all_components()
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- self.kill_started_processes()
|
|
|
|
|
|
+ self.kill_started_components()
|
|
return "Unable to start " + self.curproc + ": " + str(e)
|
|
return "Unable to start " + self.curproc + ": " + str(e)
|
|
|
|
|
|
# Started successfully
|
|
# Started successfully
|
|
self.runnable = True
|
|
self.runnable = True
|
|
|
|
+ self.__started = True
|
|
return None
|
|
return None
|
|
|
|
|
|
- def stop_all_processes(self):
|
|
|
|
- """Stop all processes."""
|
|
|
|
- cmd = { "command": ['shutdown']}
|
|
|
|
-
|
|
|
|
- self.cc_session.group_sendmsg(cmd, 'Cmdctl', 'Cmdctl')
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "ConfigManager", "ConfigManager")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Auth", "Auth")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Resolver", "Resolver")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Xfrout", "Xfrout")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Xfrin", "Xfrin")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Zonemgr", "Zonemgr")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "Stats", "Stats")
|
|
|
|
- self.cc_session.group_sendmsg(cmd, "StatsHttpd", "StatsHttpd")
|
|
|
|
-
|
|
|
|
def stop_process(self, process, recipient):
|
|
def stop_process(self, process, recipient):
|
|
"""
|
|
"""
|
|
Stop the given process, friendly-like. The process is the name it has
|
|
Stop the given process, friendly-like. The process is the name it has
|
|
(in logs, etc), the recipient is the address on msgq.
|
|
(in logs, etc), the recipient is the address on msgq.
|
|
"""
|
|
"""
|
|
logger.info(BIND10_STOP_PROCESS, process)
|
|
logger.info(BIND10_STOP_PROCESS, process)
|
|
- # TODO: Some timeout to solve processes that don't want to die would
|
|
|
|
- # help. We can even store it in the dict, it is used only as a set
|
|
|
|
- self.expected_shutdowns[process] = 1
|
|
|
|
- # Ask the process to die willingly
|
|
|
|
self.cc_session.group_sendmsg({'command': ['shutdown']}, recipient,
|
|
self.cc_session.group_sendmsg({'command': ['shutdown']}, recipient,
|
|
recipient)
|
|
recipient)
|
|
|
|
|
|
- # Series of stop_process wrappers
|
|
|
|
- def stop_resolver(self):
|
|
|
|
- self.stop_process('b10-resolver', 'Resolver')
|
|
|
|
-
|
|
|
|
- def stop_auth(self):
|
|
|
|
- self.stop_process('b10-auth', 'Auth')
|
|
|
|
-
|
|
|
|
- def stop_xfrout(self):
|
|
|
|
- self.stop_process('b10-xfrout', 'Xfrout')
|
|
|
|
|
|
+ def component_shutdown(self, exitcode=0):
|
|
|
|
+ """
|
|
|
|
+ Stop the Boss instance from a components' request. The exitcode
|
|
|
|
+ indicates the desired exit code.
|
|
|
|
|
|
- def stop_xfrin(self):
|
|
|
|
- self.stop_process('b10-xfrin', 'Xfrin')
|
|
|
|
|
|
+ If we did not start yet, it raises an exception, which is meant
|
|
|
|
+ to propagate through the component and configurator to the startup
|
|
|
|
+ routine and abort the startup immediately. If it is started up already,
|
|
|
|
+ we just mark it so we terminate soon.
|
|
|
|
|
|
- def stop_zonemgr(self):
|
|
|
|
- self.stop_process('b10-zonemgr', 'Zonemgr')
|
|
|
|
|
|
+ It does set the exit code in both cases.
|
|
|
|
+ """
|
|
|
|
+ self.exitcode = exitcode
|
|
|
|
+ if not self.__started:
|
|
|
|
+ raise Exception("Component failed during startup");
|
|
|
|
+ else:
|
|
|
|
+ self.runnable = False
|
|
|
|
|
|
def shutdown(self):
|
|
def shutdown(self):
|
|
"""Stop the BoB instance."""
|
|
"""Stop the BoB instance."""
|
|
logger.info(BIND10_SHUTDOWN)
|
|
logger.info(BIND10_SHUTDOWN)
|
|
# first try using the BIND 10 request to stop
|
|
# first try using the BIND 10 request to stop
|
|
try:
|
|
try:
|
|
- self.stop_all_processes()
|
|
|
|
|
|
+ self._component_configurator.shutdown()
|
|
except:
|
|
except:
|
|
pass
|
|
pass
|
|
# XXX: some delay probably useful... how much is uncertain
|
|
# XXX: some delay probably useful... how much is uncertain
|
|
# I have changed the delay from 0.5 to 1, but sometime it's
|
|
# I have changed the delay from 0.5 to 1, but sometime it's
|
|
# still not enough.
|
|
# still not enough.
|
|
- time.sleep(1)
|
|
|
|
|
|
+ time.sleep(1)
|
|
self.reap_children()
|
|
self.reap_children()
|
|
# next try sending a SIGTERM
|
|
# next try sending a SIGTERM
|
|
- processes_to_stop = list(self.processes.values())
|
|
|
|
- for proc_info in processes_to_stop:
|
|
|
|
- logger.info(BIND10_SEND_SIGTERM, proc_info.name,
|
|
|
|
- proc_info.pid)
|
|
|
|
|
|
+ components_to_stop = list(self.components.values())
|
|
|
|
+ for component in components_to_stop:
|
|
|
|
+ logger.info(BIND10_SEND_SIGTERM, component.name(), component.pid())
|
|
try:
|
|
try:
|
|
- proc_info.process.terminate()
|
|
|
|
|
|
+ component.kill()
|
|
except OSError:
|
|
except OSError:
|
|
# ignore these (usually ESRCH because the child
|
|
# ignore these (usually ESRCH because the child
|
|
# finally exited)
|
|
# finally exited)
|
|
pass
|
|
pass
|
|
# finally, send SIGKILL (unmaskable termination) until everybody dies
|
|
# finally, send SIGKILL (unmaskable termination) until everybody dies
|
|
- while self.processes:
|
|
|
|
|
|
+ while self.components:
|
|
# XXX: some delay probably useful... how much is uncertain
|
|
# XXX: some delay probably useful... how much is uncertain
|
|
time.sleep(0.1)
|
|
time.sleep(0.1)
|
|
self.reap_children()
|
|
self.reap_children()
|
|
- processes_to_stop = list(self.processes.values())
|
|
|
|
- for proc_info in processes_to_stop:
|
|
|
|
- logger.info(BIND10_SEND_SIGKILL, proc_info.name,
|
|
|
|
- proc_info.pid)
|
|
|
|
|
|
+ components_to_stop = list(self.components.values())
|
|
|
|
+ for component in components_to_stop:
|
|
|
|
+ logger.info(BIND10_SEND_SIGKILL, component.name(),
|
|
|
|
+ component.pid())
|
|
try:
|
|
try:
|
|
- proc_info.process.kill()
|
|
|
|
|
|
+ component.kill(True)
|
|
except OSError:
|
|
except OSError:
|
|
# ignore these (usually ESRCH because the child
|
|
# ignore these (usually ESRCH because the child
|
|
# finally exited)
|
|
# finally exited)
|
|
@@ -746,33 +723,20 @@ class BoB:
|
|
# XXX: should be impossible to get any other error here
|
|
# XXX: should be impossible to get any other error here
|
|
raise
|
|
raise
|
|
if pid == 0: break
|
|
if pid == 0: break
|
|
- if pid in self.processes:
|
|
|
|
- # One of the processes we know about. Get information on it.
|
|
|
|
- proc_info = self.processes.pop(pid)
|
|
|
|
- proc_info.restart_schedule.set_run_stop_time()
|
|
|
|
- self.dead_processes[proc_info.pid] = proc_info
|
|
|
|
-
|
|
|
|
- # Write out message, but only if in the running state:
|
|
|
|
- # During startup and shutdown, these messages are handled
|
|
|
|
- # elsewhere.
|
|
|
|
- if self.runnable:
|
|
|
|
- if exit_status is None:
|
|
|
|
- logger.warn(BIND10_PROCESS_ENDED_NO_EXIT_STATUS,
|
|
|
|
- proc_info.name, proc_info.pid)
|
|
|
|
- else:
|
|
|
|
- logger.warn(BIND10_PROCESS_ENDED_WITH_EXIT_STATUS,
|
|
|
|
- proc_info.name, proc_info.pid,
|
|
|
|
- exit_status)
|
|
|
|
-
|
|
|
|
- # Was it a special process?
|
|
|
|
- if proc_info.name == "b10-msgq":
|
|
|
|
- logger.fatal(BIND10_MSGQ_DAEMON_ENDED)
|
|
|
|
- self.runnable = False
|
|
|
|
-
|
|
|
|
- # If we're in 'brittle' mode, we want to shutdown after
|
|
|
|
- # any process dies.
|
|
|
|
- if self.brittle:
|
|
|
|
- self.runnable = False
|
|
|
|
|
|
+ if pid in self.components:
|
|
|
|
+ # One of the components we know about. Get information on it.
|
|
|
|
+ component = self.components.pop(pid)
|
|
|
|
+ logger.info(BIND10_PROCESS_ENDED, component.name(), pid,
|
|
|
|
+ exit_status)
|
|
|
|
+ if component.running() and self.runnable:
|
|
|
|
+ # Tell it it failed. But only if it matters (we are
|
|
|
|
+ # not shutting down and the component considers itself
|
|
|
|
+ # to be running.
|
|
|
|
+ component_restarted = component.failed(exit_status);
|
|
|
|
+ # if the process wants to be restarted, but not just yet,
|
|
|
|
+ # it returns False
|
|
|
|
+ if not component_restarted:
|
|
|
|
+ self.components_to_restart.append(component)
|
|
else:
|
|
else:
|
|
logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
|
|
logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
|
|
|
|
|
|
@@ -786,36 +750,24 @@ class BoB:
|
|
|
|
|
|
The values returned can be safely passed into select() as the
|
|
The values returned can be safely passed into select() as the
|
|
timeout value.
|
|
timeout value.
|
|
|
|
+
|
|
"""
|
|
"""
|
|
- next_restart = None
|
|
|
|
- # if we're shutting down, then don't restart
|
|
|
|
if not self.runnable:
|
|
if not self.runnable:
|
|
return 0
|
|
return 0
|
|
- # otherwise look through each dead process and try to restart
|
|
|
|
- still_dead = {}
|
|
|
|
|
|
+ still_dead = []
|
|
|
|
+ # keep track of the first time we need to check this queue again,
|
|
|
|
+ # if at all
|
|
|
|
+ next_restart_time = None
|
|
now = time.time()
|
|
now = time.time()
|
|
- for proc_info in self.dead_processes.values():
|
|
|
|
- if proc_info.name in self.expected_shutdowns:
|
|
|
|
- # We don't restart, we wanted it to die
|
|
|
|
- del self.expected_shutdowns[proc_info.name]
|
|
|
|
- continue
|
|
|
|
- restart_time = proc_info.restart_schedule.get_restart_time(now)
|
|
|
|
- if restart_time > now:
|
|
|
|
- if (next_restart is None) or (next_restart > restart_time):
|
|
|
|
- next_restart = restart_time
|
|
|
|
- still_dead[proc_info.pid] = proc_info
|
|
|
|
- else:
|
|
|
|
- logger.info(BIND10_RESURRECTING_PROCESS, proc_info.name)
|
|
|
|
- try:
|
|
|
|
- proc_info.respawn()
|
|
|
|
- self.processes[proc_info.pid] = proc_info
|
|
|
|
- logger.info(BIND10_RESURRECTED_PROCESS, proc_info.name, proc_info.pid)
|
|
|
|
- except:
|
|
|
|
- still_dead[proc_info.pid] = proc_info
|
|
|
|
- # remember any processes that refuse to be resurrected
|
|
|
|
- self.dead_processes = still_dead
|
|
|
|
- # return the time when the next process is ready to be restarted
|
|
|
|
- return next_restart
|
|
|
|
|
|
+ for component in self.components_to_restart:
|
|
|
|
+ if not component.restart(now):
|
|
|
|
+ still_dead.append(component)
|
|
|
|
+ if next_restart_time is None or\
|
|
|
|
+ next_restart_time > component.get_restart_time():
|
|
|
|
+ next_restart_time = component.get_restart_time()
|
|
|
|
+ self.components_to_restart = still_dead
|
|
|
|
+
|
|
|
|
+ return next_restart_time
|
|
|
|
|
|
# global variables, needed for signal handlers
|
|
# global variables, needed for signal handlers
|
|
options = None
|
|
options = None
|
|
@@ -878,8 +830,8 @@ def parse_args(args=sys.argv[1:], Parser=OptionParser):
|
|
parser.add_option("--pid-file", dest="pid_file", type="string",
|
|
parser.add_option("--pid-file", dest="pid_file", type="string",
|
|
default=None,
|
|
default=None,
|
|
help="file to dump the PID of the BIND 10 process")
|
|
help="file to dump the PID of the BIND 10 process")
|
|
- parser.add_option("--brittle", dest="brittle", action="store_true",
|
|
|
|
- help="debugging flag: exit if any component dies")
|
|
|
|
|
|
+ parser.add_option("-w", "--wait", dest="wait_time", type="int",
|
|
|
|
+ default=10, help="Time (in seconds) to wait for config manager to start up")
|
|
|
|
|
|
(options, args) = parser.parse_args(args)
|
|
(options, args) = parser.parse_args(args)
|
|
|
|
|
|
@@ -982,7 +934,8 @@ def main():
|
|
# Go bob!
|
|
# Go bob!
|
|
boss_of_bind = BoB(options.msgq_socket_file, options.data_path,
|
|
boss_of_bind = BoB(options.msgq_socket_file, options.data_path,
|
|
options.config_file, options.nocache, options.verbose,
|
|
options.config_file, options.nocache, options.verbose,
|
|
- setuid, username, options.cmdctl_port, options.brittle)
|
|
|
|
|
|
+ setuid, username, options.cmdctl_port,
|
|
|
|
+ options.wait_time)
|
|
startup_result = boss_of_bind.startup()
|
|
startup_result = boss_of_bind.startup()
|
|
if startup_result:
|
|
if startup_result:
|
|
logger.fatal(BIND10_STARTUP_ERROR, startup_result)
|
|
logger.fatal(BIND10_STARTUP_ERROR, startup_result)
|