12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385 |
- #!@PYTHON@
- # Copyright (C) 2010,2011 Internet Systems Consortium.
- #
- # Permission to use, copy, modify, and distribute this software for any
- # purpose with or without fee is hereby granted, provided that the above
- # copyright notice and this permission notice appear in all copies.
- #
- # THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
- # DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
- # INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
- # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
- # FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
- # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- """
- This file implements the b10-init program.
- Its purpose is to start up the BIND 10 system, and then manage the
- processes, by starting and stopping processes, plus restarting
- processes that exit.
- To start the system, it first runs the c-channel program (msgq), then
- connects to that. It then runs the configuration manager, and reads
- its own configuration. Then it proceeds to starting other modules.
- The Python subprocess module is used for starting processes, but
- because this is not efficient for managing groups of processes,
- SIGCHLD signals are caught and processed using the signal module.
- Most of the logic is contained in the Init class. However, since Python
- requires that signal processing happen in the main thread, we do
- signal handling outside of that class, in the code running for
- __main__.
- """
- import sys; sys.path.append ('@@PYTHONPATH@@')
- import os
- from isc.util.address_formatter import AddressFormatter
- # If B10_FROM_SOURCE is set in the environment, we use data files
- # from a directory relative to that, otherwise we use the ones
- # installed on the system
- if "B10_FROM_SOURCE" in os.environ:
- SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] +\
- "/src/bin/bind10/init.spec"
- else:
- PREFIX = "@prefix@"
- DATAROOTDIR = "@datarootdir@"
- SPECFILE_LOCATION = "@datadir@/@PACKAGE@/init.spec"\
- .replace("${datarootdir}", DATAROOTDIR)\
- .replace("${prefix}", PREFIX)
- import subprocess
- import signal
- import re
- import errno
- import time
- import select
- import random
- import socket
- from optparse import OptionParser, OptionValueError
- import io
- import pwd
- import posix
- import copy
- from bind10_config import LIBEXECPATH
- import bind10_config
- import isc.cc
- import isc.util.process
- import isc.net.parse
- import isc.log
- import isc.config
- from isc.log_messages.init_messages import *
- import isc.bind10.component
- import isc.bind10.special_component
- import isc.bind10.socket_cache
- import isc.util.traceback_handler
- import libutil_io_python
- import tempfile
- isc.log.init("b10-init", buffer=True)
- logger = isc.log.Logger("init")
- # Pending system-wide debug level definitions, the ones we
- # use here are hardcoded for now
- DBG_PROCESS = logger.DBGLVL_TRACE_BASIC
- DBG_COMMANDS = logger.DBGLVL_TRACE_DETAIL
- # Messages sent over the unix domain socket to indicate if it is followed by a
- # real socket
- CREATOR_SOCKET_OK = b"1\n"
- CREATOR_SOCKET_UNAVAILABLE = b"0\n"
- # RCodes of known exceptions for the get_token command
- CREATOR_SOCKET_ERROR = 2
- CREATOR_SHARE_ERROR = 3
- # Assign this process some longer name
- isc.util.process.rename()
- # This is the version that gets displayed to the user.
- # The VERSION string consists of the module name, the module version
- # number, and the overall BIND 10 version number (set in configure.ac).
- VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
- # This is for boot_time of Init
- _BASETIME = time.gmtime()
- # Detailed error message commonly used on startup failure, possibly due to
- # permission issue regarding log lock file. We dump verbose message because
- # it may not be clear exactly what to do if it simply says
- # "failed to open <filename>: permission denied"
- NOTE_ON_LOCK_FILE = """\
- TIP: if this is about permission error for a lock file, check if the directory
- of the file is writable for the user of the bind10 process; often you need
- to start bind10 as a super user. Also, if you specify the -u option to
- change the user and group, the directory must be writable for the group,
- and the created lock file must be writable for that user. Finally, make sure
- the lock file is not left in the directly before restarting.
- """
- class ProcessInfoError(Exception): pass
- class ChangeUserError(Exception):
- '''Exception raised when setuid/setgid fails.
- When raised, it's expected to be propagated via underlying component
- management modules to the top level so that it will help provide useful
- fatal error message.
- '''
- pass
- class ProcessInfo:
- """Information about a process"""
- dev_null = open(os.devnull, "w")
- def __init__(self, name, args, env={}, dev_null_stdout=False,
- dev_null_stderr=False):
- self.name = name
- self.args = args
- self.env = env
- self.dev_null_stdout = dev_null_stdout
- self.dev_null_stderr = dev_null_stderr
- self.process = None
- self.pid = None
- def _preexec_work(self):
- """Function used before running a program that needs to run as a
- different user."""
- # First, put us into a separate process group so we don't get
- # SIGINT signals on Ctrl-C (b10-init will shut everything down by
- # other means).
- os.setpgrp()
- def _spawn(self):
- if self.dev_null_stdout:
- spawn_stdout = self.dev_null
- else:
- spawn_stdout = None
- if self.dev_null_stderr:
- spawn_stderr = self.dev_null
- else:
- spawn_stderr = None
- # Environment variables for the child process will be a copy of those
- # of the b10-init process with any additional specific variables given
- # on construction (self.env).
- spawn_env = copy.deepcopy(os.environ)
- spawn_env.update(self.env)
- spawn_env['PATH'] = LIBEXECPATH + ':' + spawn_env['PATH']
- self.process = subprocess.Popen(self.args,
- stdin=subprocess.PIPE,
- stdout=spawn_stdout,
- stderr=spawn_stderr,
- close_fds=True,
- env=spawn_env,
- preexec_fn=self._preexec_work)
- self.pid = self.process.pid
- # spawn() and respawn() are the same for now, but in the future they
- # may have different functionality
- def spawn(self):
- self._spawn()
- def respawn(self):
- self._spawn()
- class CChannelConnectError(Exception): pass
- class ProcessStartError(Exception): pass
- class Init:
- """Init of BIND class."""
- def __init__(self, msgq_socket_file=None, data_path=None,
- config_filename=None, clear_config=False,
- verbose=False, nokill=False, setuid=None, setgid=None,
- username=None, cmdctl_port=None, wait_time=10):
- """
- Initialize the Init of BIND. This is a singleton (only one can
- run).
- The msgq_socket_file specifies the UNIX domain socket file that the
- msgq process listens on. If verbose is True, then b10-init reports
- what it is doing.
- Data path and config filename are passed through to config manager
- (if provided) and specify the config file to be used.
- The cmdctl_port is passed to cmdctl and specify on which port it
- should listen.
- wait_time controls the amount of time (in seconds) that Init waits
- for selected processes to initialize before continuing with the
- initialization. Currently this is only the configuration manager.
- """
- self.cc_session = None
- self.ccs = None
- self.curproc = None
- self.msgq_socket_file = msgq_socket_file
- self.component_config = {}
- # Some time in future, it may happen that a single component has
- # multple processes (like a pipeline-like component). If so happens,
- # name "components" may be inappropriate. But as the code isn't
- # probably completely ready for it, we leave it at components for
- # now. We also want to support multiple instances of a single
- # component. If it turns out that we'll have a single component with
- # multiple same processes or if we start multiple components with the
- # same configuration (we do this now, but it might change) is an open
- # question.
- self.components = {}
- # Simply list of components that died and need to wait for a
- # restart. Components manage their own restart schedule now
- self.components_to_restart = []
- self.runnable = False
- self.__uid = setuid
- self.__gid = setgid
- self.username = username
- self.verbose = verbose
- self.nokill = nokill
- self.data_path = data_path
- self.config_filename = config_filename
- self.clear_config = clear_config
- self.cmdctl_port = cmdctl_port
- self.wait_time = wait_time
- self.msgq_timeout = 5
- # _run_under_unittests is only meant to be used when testing. It
- # bypasses execution of some code to help with testing.
- self._run_under_unittests = False
- self._component_configurator = isc.bind10.component.Configurator(self,
- isc.bind10.special_component.get_specials())
- # The priorities here make them start in the correct order. First
- # the socket creator (which would drop root privileges by then),
- # then message queue and after that the config manager (which uses
- # the config manager)
- self.__core_components = {
- 'sockcreator': {
- 'kind': 'core',
- 'special': 'sockcreator',
- 'priority': 200
- },
- 'msgq': {
- 'kind': 'core',
- 'special': 'msgq',
- 'priority': 199
- },
- 'cfgmgr': {
- 'kind': 'core',
- 'special': 'cfgmgr',
- 'priority': 198
- }
- }
- self.__started = False
- self.exitcode = 0
- # If -v was set, enable full debug logging.
- if self.verbose:
- logger.set_severity("DEBUG", 99)
- # This is set in init_socket_srv
- self._socket_path = None
- self._socket_cache = None
- self._tmpdir = None
- self._srv_socket = None
- self._unix_sockets = {}
- def __propagate_component_config(self, config):
- comps = dict(config)
- # Fill in the core components, so they stay alive
- for comp in self.__core_components:
- if comp in comps:
- raise Exception(comp + " is core component managed by " +
- "b10-init, do not set it")
- comps[comp] = self.__core_components[comp]
- # Update the configuration
- self._component_configurator.reconfigure(comps)
- def change_user(self):
- '''Change the user and group to those specified on construction.
- This method is expected to be called by a component on initial
- startup when the system is ready to switch the user and group
- (i.e., once all components that need the privilege of the original
- user have started).
- '''
- try:
- if self.__gid is not None:
- logger.info(BIND10_SETGID, self.__gid)
- posix.setgid(self.__gid)
- except Exception as ex:
- raise ChangeUserError('failed to change group: ' + str(ex))
- try:
- if self.__uid is not None:
- posix.setuid(self.__uid)
- # We use one-shot logger after setuid here. This will
- # detect any permission issue regarding logging due to the
- # result of setuid at the earliest opportunity.
- isc.log.Logger("b10-init").info(BIND10_SETUID, self.__uid)
- except Exception as ex:
- raise ChangeUserError('failed to change user: ' + str(ex))
- def config_handler(self, new_config):
- # If this is initial update, don't do anything now, leave it to startup
- if not self.runnable:
- return
- logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
- new_config)
- try:
- if 'components' in new_config:
- self.__propagate_component_config(new_config['components'])
- return isc.config.ccsession.create_answer(0)
- except Exception as e:
- logger.error(BIND10_RECONFIGURE_ERROR, e)
- return isc.config.ccsession.create_answer(1, str(e))
- def get_processes(self):
- pids = list(self.components.keys())
- pids.sort()
- process_list = [ ]
- for pid in pids:
- process_list.append([pid, self.components[pid].name(),
- self.components[pid].address()])
- return process_list
- def _get_stats_data(self):
- return { 'boot_time':
- time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
- }
- def command_handler(self, command, args):
- logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
- answer = isc.config.ccsession.create_answer(1,
- "command not implemented")
- if type(command) != str:
- answer = isc.config.ccsession.create_answer(1, "bad command")
- else:
- if command == "shutdown":
- self.runnable = False
- answer = isc.config.ccsession.create_answer(0)
- elif command == "getstats":
- answer = isc.config.ccsession.create_answer(
- 0, self._get_stats_data())
- elif command == "ping":
- answer = isc.config.ccsession.create_answer(0, "pong")
- elif command == "show_processes":
- answer = isc.config.ccsession. \
- create_answer(0, self.get_processes())
- elif command == "get_socket":
- answer = self._get_socket(args)
- elif command == "drop_socket":
- if "token" not in args:
- answer = isc.config.ccsession. \
- create_answer(1, "Missing token parameter")
- else:
- try:
- self._socket_cache.drop_socket(args["token"])
- answer = isc.config.ccsession.create_answer(0)
- except Exception as e:
- answer = isc.config.ccsession.create_answer(1, str(e))
- else:
- answer = isc.config.ccsession.create_answer(1,
- "Unknown command")
- return answer
- def kill_started_components(self):
- """
- Called as part of the exception handling when a process fails to
- start, this runs through the list of started processes, killing
- each one. It then clears that list.
- """
- logger.info(BIND10_KILLING_ALL_PROCESSES)
- self.__kill_children(True)
- self.components = {}
- def _read_bind10_config(self):
- """
- Reads the parameters associated with the Init module itself.
- This means the list of components we should start now.
- This could easily be combined into start_all_processes, but
- it stays because of historical reasons and because the tests
- replace the method sometimes.
- """
- logger.info(BIND10_READING_INIT_CONFIGURATION)
- config_data = self.ccs.get_full_config()
- self.__propagate_component_config(config_data['components'])
- def log_starting(self, process, port = None, address = None):
- """
- A convenience function to output a "Starting xxx" message if the
- logging is set to DEBUG with debuglevel DBG_PROCESS or higher.
- Putting this into a separate method ensures
- that the output form is consistent across all processes.
- The process name (passed as the first argument) is put into
- self.curproc, and is used to indicate which process failed to
- start if there is an error (and is used in the "Started" message
- on success). The optional port and address information are
- appended to the message (if present).
- """
- self.curproc = process
- if port is None and address is None:
- logger.info(BIND10_STARTING_PROCESS, self.curproc)
- elif address is None:
- logger.info(BIND10_STARTING_PROCESS_PORT, self.curproc,
- port)
- else:
- logger.info(BIND10_STARTING_PROCESS_PORT_ADDRESS,
- self.curproc, AddressFormatter((address, port)))
- def log_started(self, pid = None):
- """
- A convenience function to output a 'Started xxxx (PID yyyy)'
- message. As with starting_message(), this ensures a consistent
- format.
- """
- if pid is None:
- logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
- else:
- logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc,
- pid)
- def process_running(self, msg, who):
- """
- Some processes return a message to the Init after they have
- started to indicate that they are running. The form of the
- message is a dictionary with contents {"running:", "<process>"}.
- This method checks the passed message and returns True if the
- "who" process is contained in the message (so is presumably
- running). It returns False for all other conditions and will
- log an error if appropriate.
- """
- if msg is not None:
- try:
- if msg["running"] == who:
- return True
- else:
- logger.error(BIND10_STARTUP_UNEXPECTED_MESSAGE, msg)
- except:
- logger.error(BIND10_STARTUP_UNRECOGNISED_MESSAGE, msg)
- return False
- # The next few methods start the individual processes of BIND-10. They
- # are called via start_all_processes(). If any fail, an exception is
- # raised which is caught by the caller of start_all_processes(); this kills
- # processes started up to that point before terminating the program.
- def _make_process_info(self, name, args, env,
- dev_null_stdout=False, dev_null_stderr=False):
- """
- Wrapper around ProcessInfo(), useful to override
- ProcessInfo() creation during testing.
- """
- return ProcessInfo(name, args, env, dev_null_stdout, dev_null_stderr)
- def start_msgq(self):
- """
- Start the message queue and connect to the command channel.
- """
- self.log_starting("b10-msgq")
- msgq_proc = self._make_process_info("b10-msgq", ["b10-msgq"],
- self.c_channel_env,
- True, not self.verbose)
- msgq_proc.spawn()
- self.log_started(msgq_proc.pid)
- # Now connect to the c-channel
- cc_connect_start = time.time()
- while self.cc_session is None:
- # if we are run under unittests, break
- if self._run_under_unittests:
- break
- # if we have been trying for "a while" give up
- if (time.time() - cc_connect_start) > self.msgq_timeout:
- if msgq_proc.process:
- msgq_proc.process.kill()
- logger.error(BIND10_CONNECTING_TO_CC_FAIL)
- raise CChannelConnectError("Unable to connect to c-channel " +
- "after 5 seconds")
- # try to connect, and if we can't wait a short while
- try:
- self.cc_session = isc.cc.Session(self.msgq_socket_file)
- except isc.cc.session.SessionError:
- time.sleep(0.1)
- # Subscribe to the message queue. The only messages we expect to
- # receive on this channel are once relating to process startup.
- if self.cc_session is not None:
- self.cc_session.group_subscribe("Init")
- return msgq_proc
- def wait_msgq(self):
- """
- Wait for the message queue to fully start. It does so only after
- the config manager connects to it. We know it is ready when it
- starts answering commands.
- We don't add a specific command for it here, an error response is
- as good as positive one to know it is alive.
- """
- # We do 10 times shorter sleep here (since the start should be fast
- # now), so we have 10 times more attempts.
- time_remaining = self.wait_time * 10
- retry = True
- while time_remaining > 0 and retry:
- try:
- self.ccs.rpc_call('AreYouThere?', 'Msgq')
- # We don't expect this to succeed. If it does, it's programmer
- # error
- raise Exception("Non-existing RPC call succeeded")
- except isc.config.RPCRecipientMissing:
- retry = True # Not there yet
- time.sleep(0.1)
- time_remaining -= 1
- except isc.config.RPCError:
- retry = False # It doesn't like the RPC, so it's alive now
- if retry: # Still not started
- raise ProcessStartError("Msgq didn't complete the second stage " +
- "of startup")
- def start_cfgmgr(self):
- """
- Starts the configuration manager process
- """
- self.log_starting("b10-cfgmgr")
- args = ["b10-cfgmgr"]
- if self.data_path is not None:
- args.append("--data-path=" + self.data_path)
- if self.config_filename is not None:
- args.append("--config-filename=" + self.config_filename)
- if self.clear_config:
- args.append("--clear-config")
- bind_cfgd = self._make_process_info("b10-cfgmgr", args,
- self.c_channel_env)
- bind_cfgd.spawn()
- self.log_started(bind_cfgd.pid)
- # Wait for the configuration manager to start up as
- # subsequent initialization cannot proceed without it. The
- # time to wait can be set on the command line.
- time_remaining = self.wait_time
- msg, env = self.cc_session.group_recvmsg()
- while time_remaining > 0 and not self.process_running(msg,
- "ConfigManager"):
- logger.debug(DBG_PROCESS, BIND10_WAIT_CFGMGR)
- time.sleep(1)
- time_remaining = time_remaining - 1
- msg, env = self.cc_session.group_recvmsg()
- if not self.process_running(msg, "ConfigManager"):
- raise ProcessStartError("Configuration manager process has not " +
- "started")
- return bind_cfgd
- def start_ccsession(self, c_channel_env):
- """
- Start the CC Session
- The argument c_channel_env is unused but is supplied to keep the
- argument list the same for all start_xxx methods.
- With regards to logging, note that as the CC session is not a
- process, the log_starting/log_started methods are not used.
- """
- logger.info(BIND10_STARTING_CC)
- # Unsubscribe from the other CC session first, because we only
- # monitor one and msgq expects all data sent to us to be read,
- # or it will close its side of the socket.
- if self.cc_session is not None:
- self.cc_session.group_unsubscribe("Init")
- self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
- self.config_handler,
- self.command_handler,
- socket_file = self.msgq_socket_file)
- self.ccs.start()
- logger.debug(DBG_PROCESS, BIND10_STARTED_CC)
- # A couple of utility methods for starting processes...
- def start_process(self, name, args, c_channel_env, port=None,
- address=None):
- """
- Given a set of command arguments, start the process and output
- appropriate log messages. If the start is successful, the process
- is added to the list of started processes.
- The port and address arguments are for log messages only.
- """
- self.log_starting(name, port, address)
- newproc = self._make_process_info(name, args, c_channel_env)
- newproc.spawn()
- self.log_started(newproc.pid)
- return newproc
- def register_process(self, pid, component):
- """
- Put another process into b10-init to watch over it. When the process
- dies, the component.failed() is called with the exit code.
- It is expected the info is a isc.bind10.component.BaseComponent
- subclass (or anything having the same interface).
- """
- self.components[pid] = component
- def start_simple(self, name):
- """
- Most of the BIND-10 processes are started with the command:
- <process-name> [-v]
- ... where -v is appended if verbose is enabled. This method
- generates the arguments from the name and starts the process.
- The port and address arguments are for log messages only.
- """
- # Set up the command arguments.
- args = [name]
- if self.verbose:
- args += ['-v']
- # ... and start the process
- return self.start_process(name, args, self.c_channel_env)
- # The next few methods start up the rest of the BIND-10 processes.
- # Although many of these methods are little more than a call to
- # start_simple, they are retained (a) for testing reasons and (b) as a
- # place where modifications can be made if the process start-up sequence
- # changes for a given process.
- def start_auth(self):
- """
- Start the Authoritative server
- """
- authargs = ['b10-auth']
- if self.verbose:
- authargs += ['-v']
- # ... and start
- return self.start_process("b10-auth", authargs, self.c_channel_env)
- def start_resolver(self):
- """
- Start the Resolver. At present, all these arguments and switches
- are pure speculation. As with the auth daemon, they should be
- read from the configuration database.
- """
- self.curproc = "b10-resolver"
- # XXX: this must be read from the configuration manager in the future
- resargs = ['b10-resolver']
- if self.verbose:
- resargs += ['-v']
- # ... and start
- return self.start_process("b10-resolver", resargs, self.c_channel_env)
- def start_cmdctl(self):
- """
- Starts the command control process
- """
- args = ["b10-cmdctl"]
- if self.cmdctl_port is not None:
- args.append("--port=" + str(self.cmdctl_port))
- if self.verbose:
- args.append("-v")
- return self.start_process("b10-cmdctl", args, self.c_channel_env,
- self.cmdctl_port)
- def start_all_components(self):
- """
- Starts up all the components. Any exception generated during the
- starting of the components is handled by the caller.
- """
- # Start the real core (sockcreator, msgq, cfgmgr)
- self._component_configurator.startup(self.__core_components)
- # Connect to the msgq. This is not a process, so it's not handled
- # inside the configurator.
- self.start_ccsession(self.c_channel_env)
- # Make sure msgq is fully started before proceeding to the rest
- # of the components.
- self.wait_msgq()
- # Extract the parameters associated with Init. This can only be
- # done after the CC Session is started. Note that the logging
- # configuration may override the "-v" switch set on the command line.
- self._read_bind10_config()
- # TODO: Return the dropping of privileges
- def startup(self):
- """
- Start the Init instance.
- Returns None if successful, otherwise an string describing the
- problem.
- """
- # Try to connect to the c-channel daemon, to see if it is already
- # running
- c_channel_env = {}
- if self.msgq_socket_file is not None:
- c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
- logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
- try:
- self.cc_session = isc.cc.Session(self.msgq_socket_file)
- logger.fatal(BIND10_MSGQ_ALREADY_RUNNING)
- return "b10-msgq already running, or socket file not cleaned , " +\
- "cannot start"
- except isc.cc.session.SessionError:
- # this is the case we want, where the msgq is not running
- pass
- # Start all components. If any one fails to start, kill all started
- # components and exit with an error indication.
- try:
- self.c_channel_env = c_channel_env
- self.start_all_components()
- except ChangeUserError as e:
- self.kill_started_components()
- return str(e) + '; ' + NOTE_ON_LOCK_FILE.replace('\n', ' ')
- except Exception as e:
- self.kill_started_components()
- return "Unable to start " + self.curproc + ": " + str(e)
- # Started successfully
- self.runnable = True
- self.__started = True
- return None
- def stop_process(self, process, recipient, pid):
- """
- Stop the given process, friendly-like. The process is the name it has
- (in logs, etc), the recipient is the address on msgq. The pid is the
- pid of the process (if we have multiple processes of the same name,
- it might want to choose if it is for this one).
- """
- logger.info(BIND10_STOP_PROCESS, process)
- try:
- self.cc_session.group_sendmsg(isc.config.ccsession.
- create_command('shutdown',
- {'pid': pid}),
- recipient, recipient)
- except:
- logger.error(BIND10_COMPONENT_SHUTDOWN_ERROR, process)
- raise
- def component_shutdown(self, exitcode=0):
- """
- Stop the Init instance from a components' request. The exitcode
- indicates the desired exit code.
- If we did not start yet, it raises an exception, which is meant
- to propagate through the component and configurator to the startup
- routine and abort the startup immediately. If it is started up already,
- we just mark it so we terminate soon.
- It does set the exit code in both cases.
- """
- self.exitcode = exitcode
- if not self.__started:
- raise Exception("Component failed during startup");
- else:
- self.runnable = False
- def shutdown(self):
- """Stop the Init instance."""
- logger.info(BIND10_SHUTDOWN)
- # If ccsession is still there, inform rest of the system this module
- # is stopping. Since everything will be stopped shortly, this is not
- # really necessary, but this is done to reflect that b10-init is also
- # 'just' a module.
- self.ccs.send_stopping()
- # try using the BIND 10 request to stop
- try:
- self._component_configurator.shutdown()
- except:
- pass
- # XXX: some delay probably useful... how much is uncertain
- # I have changed the delay from 0.5 to 1, but sometime it's
- # still not enough.
- time.sleep(1)
- self.reap_children()
- # Send TERM and KILL signals to modules if we're not prevented
- # from doing so
- if not self.nokill:
- # next try sending a SIGTERM
- self.__kill_children(False)
- # finally, send SIGKILL (unmaskable termination) until everybody
- # dies
- while self.components:
- # XXX: some delay probably useful... how much is uncertain
- time.sleep(0.1)
- self.reap_children()
- self.__kill_children(True)
- logger.info(BIND10_SHUTDOWN_COMPLETE)
- def __kill_children(self, forceful):
- '''Terminate remaining subprocesses by sending a signal.
- The forceful paramter will be passed Component.kill().
- This is a dedicated subroutine of shutdown(), just to unify two
- similar cases.
- '''
- logmsg = BIND10_SEND_SIGKILL if forceful else BIND10_SEND_SIGTERM
- # We need to make a copy of values as the components may be modified
- # in the loop.
- for component in list(self.components.values()):
- logger.info(logmsg, component.name(), component.pid())
- try:
- component.kill(forceful)
- except OSError as ex:
- # If kill() failed due to EPERM, it doesn't make sense to
- # keep trying, so we just log the fact and forget that
- # component. Ignore other OSErrors (usually ESRCH because
- # the child finally exited)
- signame = "SIGKILL" if forceful else "SIGTERM"
- logger.info(BIND10_SEND_SIGNAL_FAIL, signame,
- component.name(), component.pid(), ex)
- if ex.errno == errno.EPERM:
- del self.components[component.pid()]
- def _get_process_exit_status(self):
- return os.waitpid(-1, os.WNOHANG)
- def reap_children(self):
- """Check to see if any of our child processes have exited,
- and note this for later handling.
- """
- while True:
- try:
- (pid, exit_status) = self._get_process_exit_status()
- except OSError as o:
- if o.errno == errno.ECHILD:
- break
- # XXX: should be impossible to get any other error here
- raise
- if pid == 0:
- break
- if pid in self.components:
- # One of the components we know about. Get information on it.
- component = self.components.pop(pid)
- logger.info(BIND10_PROCESS_ENDED, component.name(), pid,
- exit_status)
- if component.is_running() and self.runnable:
- # Tell it it failed. But only if it matters (we are
- # not shutting down and the component considers itself
- # to be running.
- component_restarted = component.failed(exit_status);
- # if the process wants to be restarted, but not just yet,
- # it returns False
- if not component_restarted:
- self.components_to_restart.append(component)
- else:
- logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
- def restart_processes(self):
- """
- Restart any dead processes:
- * Returns the time when the next process is ready to be restarted.
- * If the server is shutting down, returns 0.
- * If there are no processes, returns None.
- The values returned can be safely passed into select() as the
- timeout value.
- """
- if not self.runnable:
- return 0
- still_dead = []
- # keep track of the first time we need to check this queue again,
- # if at all
- next_restart_time = None
- now = time.time()
- for component in self.components_to_restart:
- # If the component was removed from the configurator between since
- # scheduled to restart, just ignore it. The object will just be
- # dropped here.
- if not self._component_configurator.has_component(component):
- logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
- elif not component.restart(now):
- still_dead.append(component)
- if next_restart_time is None or\
- next_restart_time > component.get_restart_time():
- next_restart_time = component.get_restart_time()
- self.components_to_restart = still_dead
- return next_restart_time
- def _get_socket(self, args):
- """
- Implementation of the get_socket CC command. It asks the cache
- to provide the token and sends the information back.
- """
- try:
- try:
- addr = isc.net.parse.addr_parse(args['address'])
- port = isc.net.parse.port_parse(args['port'])
- protocol = args['protocol']
- if protocol not in ['UDP', 'TCP']:
- raise ValueError("Protocol must be either UDP or TCP")
- share_mode = args['share_mode']
- if share_mode not in ['ANY', 'SAMEAPP', 'NO']:
- raise ValueError("Share mode must be one of ANY, SAMEAPP" +
- " or NO")
- share_name = args['share_name']
- except KeyError as ke:
- return \
- isc.config.ccsession.create_answer(1,
- "Missing parameter " +
- str(ke))
- # FIXME: This call contains blocking IPC. It is expected to be
- # short, but if it turns out to be problem, we'll need to do
- # something about it.
- token = self._socket_cache.get_token(protocol, addr, port,
- share_mode, share_name)
- return isc.config.ccsession.create_answer(0, {
- 'token': token,
- 'path': self._socket_path
- })
- except isc.bind10.socket_cache.SocketError as e:
- return isc.config.ccsession.create_answer(CREATOR_SOCKET_ERROR,
- str(e))
- except isc.bind10.socket_cache.ShareError as e:
- return isc.config.ccsession.create_answer(CREATOR_SHARE_ERROR,
- str(e))
- except Exception as e:
- return isc.config.ccsession.create_answer(1, str(e))
- def socket_request_handler(self, token, unix_socket):
- """
- This function handles a token that comes over a unix_domain socket.
- The function looks into the _socket_cache and sends the socket
- identified by the token back over the unix_socket.
- """
- try:
- token = str(token, 'ASCII') # Convert from bytes to str
- fd = self._socket_cache.get_socket(token, unix_socket.fileno())
- # FIXME: These two calls are blocking in their nature. An OS-level
- # buffer is likely to be large enough to hold all these data, but
- # if it wasn't and the remote application got stuck, we would have
- # a problem. If there appear such problems, we should do something
- # about it.
- unix_socket.sendall(CREATOR_SOCKET_OK)
- libutil_io_python.send_fd(unix_socket.fileno(), fd)
- except Exception as e:
- logger.info(BIND10_NO_SOCKET, token, e)
- unix_socket.sendall(CREATOR_SOCKET_UNAVAILABLE)
- def socket_consumer_dead(self, unix_socket):
- """
- This function handles when a unix_socket closes. This means all
- sockets sent to it are to be considered closed. This function signals
- so to the _socket_cache.
- """
- logger.info(BIND10_LOST_SOCKET_CONSUMER, unix_socket.fileno())
- try:
- self._socket_cache.drop_application(unix_socket.fileno())
- except ValueError:
- # This means the application holds no sockets. It's harmless, as it
- # can happen in real life - for example, it requests a socket, but
- # get_socket doesn't find it, so the application dies. It should be
- # rare, though.
- pass
- def set_creator(self, creator):
- """
- Registeres a socket creator into the b10-init. The socket creator is
- not used directly, but through a cache. The cache is created in this
- method.
- If called more than once, it raises a ValueError.
- """
- if self._socket_cache is not None:
- raise ValueError("A creator was inserted previously")
- self._socket_cache = isc.bind10.socket_cache.Cache(creator)
- def init_socket_srv(self):
- """
- Creates and listens on a unix-domain socket to be able to send out
- the sockets.
- This method should be called after switching user, or the switched
- applications won't be able to access the socket.
- """
- self._srv_socket = socket.socket(socket.AF_UNIX)
- # We create a temporary directory somewhere safe and unique, to avoid
- # the need to find the place ourself or bother users. Also, this
- # secures the socket on some platforms, as it creates a private
- # directory.
- self._tmpdir = tempfile.mkdtemp(prefix='sockcreator-')
- # Get the name
- self._socket_path = os.path.join(self._tmpdir, "sockcreator")
- # And bind the socket to the name
- self._srv_socket.bind(self._socket_path)
- self._srv_socket.listen(5)
- def remove_socket_srv(self):
- """
- Closes and removes the listening socket and the directory where it
- lives, as we created both.
- It does nothing if the _srv_socket is not set (eg. it was not yet
- initialized).
- """
- if self._srv_socket is not None:
- self._srv_socket.close()
- if os.path.exists(self._socket_path):
- os.remove(self._socket_path)
- if os.path.isdir(self._tmpdir):
- os.rmdir(self._tmpdir)
- def _srv_accept(self):
- """
- Accept a socket from the unix domain socket server and put it to the
- others we care about.
- """
- (socket, conn) = self._srv_socket.accept()
- self._unix_sockets[socket.fileno()] = (socket, b'')
- def _socket_data(self, socket_fileno):
- """
- This is called when a socket identified by the socket_fileno needs
- attention. We try to read data from there. If it is closed, we remove
- it.
- """
- (sock, previous) = self._unix_sockets[socket_fileno]
- while True:
- try:
- data = sock.recv(1, socket.MSG_DONTWAIT)
- except socket.error as se:
- # These two might be different on some systems
- if se.errno == errno.EAGAIN or se.errno == errno.EWOULDBLOCK:
- # No more data now. Oh, well, just store what we have.
- self._unix_sockets[socket_fileno] = (sock, previous)
- return
- else:
- data = b'' # Pretend it got closed
- if len(data) == 0: # The socket got to it's end
- del self._unix_sockets[socket_fileno]
- self.socket_consumer_dead(sock)
- sock.close()
- return
- else:
- if data == b"\n":
- # Handle this token and clear it
- self.socket_request_handler(previous, sock)
- previous = b''
- else:
- previous += data
- def run(self, wakeup_fd):
- """
- The main loop, waiting for sockets, commands and dead processes.
- Runs as long as the runnable is true.
- The wakeup_fd descriptor is the read end of pipe where CHLD signal
- handler writes.
- """
- ccs_fd = self.ccs.get_socket().fileno()
- while self.runnable:
- # clean up any processes that exited
- self.reap_children()
- next_restart = self.restart_processes()
- if next_restart is None:
- wait_time = None
- else:
- wait_time = max(next_restart - time.time(), 0)
- # select() can raise EINTR when a signal arrives,
- # even if they are resumable, so we have to catch
- # the exception
- try:
- (rlist, wlist, xlist) = \
- select.select([wakeup_fd, ccs_fd,
- self._srv_socket.fileno()] +
- list(self._unix_sockets.keys()), [], [],
- wait_time)
- except select.error as err:
- if err.args[0] == errno.EINTR:
- (rlist, wlist, xlist) = ([], [], [])
- else:
- logger.fatal(BIND10_SELECT_ERROR, err)
- break
- for fd in rlist + xlist:
- if fd == ccs_fd:
- try:
- self.ccs.check_command()
- except isc.cc.session.ProtocolError:
- logger.fatal(BIND10_MSGQ_DISAPPEARED)
- self.runnable = False
- break
- elif fd == wakeup_fd:
- os.read(wakeup_fd, 32)
- elif fd == self._srv_socket.fileno():
- self._srv_accept()
- elif fd in self._unix_sockets:
- self._socket_data(fd)
- # global variables, needed for signal handlers
- options = None
- b10_init = None
- def reaper(signal_number, stack_frame):
- """A child process has died (SIGCHLD received)."""
- # don't do anything...
- # the Python signal handler has been set up to write
- # down a pipe, waking up our select() bit
- pass
- def get_signame(signal_number):
- """Return the symbolic name for a signal."""
- for sig in dir(signal):
- if sig.startswith("SIG") and sig[3].isalnum():
- if getattr(signal, sig) == signal_number:
- return sig
- return "Unknown signal %d" % signal_number
- # XXX: perhaps register atexit() function and invoke that instead
- def fatal_signal(signal_number, stack_frame):
- """We need to exit (SIGINT or SIGTERM received)."""
- global options
- global b10_init
- logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- b10_init.runnable = False
- def process_rename(option, opt_str, value, parser):
- """Function that renames the process if it is requested by a option."""
- isc.util.process.rename(value)
- def parse_args(args=sys.argv[1:], Parser=OptionParser):
- """
- Function for parsing command line arguments. Returns the
- options object from OptionParser.
- """
- parser = Parser(version=VERSION)
- parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
- type="string", default=None,
- help="UNIX domain socket file the b10-msgq daemon " +
- "will use")
- parser.add_option("-i", "--no-kill", action="store_true", dest="nokill",
- default=False,
- help="do not send SIGTERM and SIGKILL signals to " +
- "modules during shutdown")
- parser.add_option("-u", "--user", dest="user", type="string", default=None,
- help="Change user after startup (must run as root)")
- parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
- help="display more about what is going on")
- parser.add_option("--pretty-name", type="string", action="callback",
- callback=process_rename,
- help="Set the process name (displayed in ps, top, ...)")
- parser.add_option("-c", "--config-file", action="store",
- dest="config_file", default=None,
- help="Configuration database filename")
- parser.add_option("--clear-config", action="store_true",
- dest="clear_config", default=False,
- help="Create backup of the configuration file and " +
- "start with a clean configuration")
- parser.add_option("-p", "--data-path", dest="data_path",
- help="Directory to search for configuration files",
- default=None)
- parser.add_option("--cmdctl-port", dest="cmdctl_port", type="int",
- default=None, help="Port of command control")
- parser.add_option("--pid-file", dest="pid_file", type="string",
- default=None,
- help="file to dump the PID of the BIND 10 process")
- parser.add_option("-w", "--wait", dest="wait_time", type="int",
- default=10,
- help="Time (in seconds) to wait for config manager to "
- "start up")
- (options, args) = parser.parse_args(args)
- if options.cmdctl_port is not None:
- try:
- isc.net.parse.port_parse(options.cmdctl_port)
- except ValueError as e:
- parser.error(e)
- if args:
- parser.print_help()
- sys.exit(1)
- return options
- def dump_pid(pid_file):
- """
- Dump the PID of the current process to the specified file. If the given
- file is None this function does nothing. If the file already exists,
- the existing content will be removed. If a system error happens in
- creating or writing to the file, the corresponding exception will be
- propagated to the caller.
- """
- if pid_file is None:
- return
- f = open(pid_file, "w")
- f.write('%d\n' % os.getpid())
- f.close()
- def unlink_pid_file(pid_file):
- """
- Remove the given file, which is basically expected to be the PID file
- created by dump_pid(). The specified may or may not exist; if it
- doesn't this function does nothing. Other system level errors in removing
- the file will be propagated as the corresponding exception.
- """
- if pid_file is None:
- return
- try:
- os.unlink(pid_file)
- except OSError as error:
- if error.errno is not errno.ENOENT:
- raise
- def remove_lock_files():
- """
- Remove various lock files which were created by code such as in the
- logger. This function should be called after BIND 10 shutdown.
- """
- lockfiles = ["logger_lockfile"]
- lpath = bind10_config.DATA_PATH
- if "B10_FROM_BUILD" in os.environ:
- lpath = os.environ["B10_FROM_BUILD"]
- if "B10_FROM_SOURCE_LOCALSTATEDIR" in os.environ:
- lpath = os.environ["B10_FROM_SOURCE_LOCALSTATEDIR"]
- if "B10_LOCKFILE_DIR_FROM_BUILD" in os.environ:
- lpath = os.environ["B10_LOCKFILE_DIR_FROM_BUILD"]
- for f in lockfiles:
- fname = lpath + '/' + f
- if os.path.isfile(fname):
- try:
- os.unlink(fname)
- except OSError as e:
- # We catch and ignore permission related error on unlink.
- # This can happen if bind10 started with -u, created a lock
- # file as a privileged user, but the directory is not writable
- # for the changed user. This setup will cause immediate
- # start failure, and we leave verbose error message including
- # the leftover lock file, so it should be acceptable to ignore
- # it (note that it doesn't make sense to log this event at
- # this poitn)
- if e.errno != errno.EPERM and e.errno != errno.EACCES:
- raise
- return
- def main():
- global options
- global b10_init
- # Enforce line buffering on stdout, even when not a TTY
- sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)
- options = parse_args()
- # Announce startup. Making this is the first log message.
- try:
- logger.info(BIND10_STARTING, VERSION)
- except RuntimeError as e:
- sys.stderr.write('ERROR: failed to write the initial log: %s\n' %
- str(e))
- sys.stderr.write(NOTE_ON_LOCK_FILE)
- sys.exit(1)
- # Check user ID.
- setuid = None
- setgid = None
- username = None
- if options.user:
- # Try getting information about the user, assuming UID passed.
- try:
- pw_ent = pwd.getpwuid(int(options.user))
- setuid = pw_ent.pw_uid
- setgid = pw_ent.pw_gid
- username = pw_ent.pw_name
- except ValueError:
- pass
- except KeyError:
- pass
- # Next try getting information about the user, assuming user name
- # passed.
- # If the information is both a valid user name and user number, we
- # prefer the name because we try it second. A minor point, hopefully.
- try:
- pw_ent = pwd.getpwnam(options.user)
- setuid = pw_ent.pw_uid
- setgid = pw_ent.pw_gid
- username = pw_ent.pw_name
- except KeyError:
- pass
- if setuid is None:
- logger.fatal(BIND10_INVALID_USER, options.user)
- sys.exit(1)
- # Create wakeup pipe for signal handlers
- wakeup_pipe = os.pipe()
- signal.set_wakeup_fd(wakeup_pipe[1])
- # Set signal handlers for catching child termination, as well
- # as our own demise.
- signal.signal(signal.SIGCHLD, reaper)
- signal.siginterrupt(signal.SIGCHLD, False)
- signal.signal(signal.SIGINT, fatal_signal)
- signal.signal(signal.SIGTERM, fatal_signal)
- # Block SIGPIPE, as we don't want it to end this process
- signal.signal(signal.SIGPIPE, signal.SIG_IGN)
- try:
- b10_init = Init(options.msgq_socket_file, options.data_path,
- options.config_file, options.clear_config,
- options.verbose, options.nokill,
- setuid, setgid, username, options.cmdctl_port,
- options.wait_time)
- startup_result = b10_init.startup()
- if startup_result:
- logger.fatal(BIND10_STARTUP_ERROR, startup_result)
- sys.exit(1)
- b10_init.init_socket_srv()
- logger.info(BIND10_STARTUP_COMPLETE)
- dump_pid(options.pid_file)
- # Let it run
- b10_init.run(wakeup_pipe[0])
- # shutdown
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- b10_init.shutdown()
- finally:
- # Clean up the filesystem
- unlink_pid_file(options.pid_file)
- remove_lock_files()
- if b10_init is not None:
- b10_init.remove_socket_srv()
- sys.exit(b10_init.exitcode)
- if __name__ == "__main__":
- isc.util.traceback_handler.traceback_handler(main)
|