init.py.in 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385
  1. #!@PYTHON@
  2. # Copyright (C) 2010,2011 Internet Systems Consortium.
  3. #
  4. # Permission to use, copy, modify, and distribute this software for any
  5. # purpose with or without fee is hereby granted, provided that the above
  6. # copyright notice and this permission notice appear in all copies.
  7. #
  8. # THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
  9. # DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
  10. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
  11. # INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
  12. # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
  13. # FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  14. # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  15. # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. """
  17. This file implements the b10-init program.
  18. Its purpose is to start up the BIND 10 system, and then manage the
  19. processes, by starting and stopping processes, plus restarting
  20. processes that exit.
  21. To start the system, it first runs the c-channel program (msgq), then
  22. connects to that. It then runs the configuration manager, and reads
  23. its own configuration. Then it proceeds to starting other modules.
  24. The Python subprocess module is used for starting processes, but
  25. because this is not efficient for managing groups of processes,
  26. SIGCHLD signals are caught and processed using the signal module.
  27. Most of the logic is contained in the Init class. However, since Python
  28. requires that signal processing happen in the main thread, we do
  29. signal handling outside of that class, in the code running for
  30. __main__.
  31. """
  32. import sys; sys.path.append ('@@PYTHONPATH@@')
  33. import os
  34. from isc.util.address_formatter import AddressFormatter
  35. # If B10_FROM_SOURCE is set in the environment, we use data files
  36. # from a directory relative to that, otherwise we use the ones
  37. # installed on the system
  38. if "B10_FROM_SOURCE" in os.environ:
  39. SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] +\
  40. "/src/bin/bind10/init.spec"
  41. else:
  42. PREFIX = "@prefix@"
  43. DATAROOTDIR = "@datarootdir@"
  44. SPECFILE_LOCATION = "@datadir@/@PACKAGE@/init.spec"\
  45. .replace("${datarootdir}", DATAROOTDIR)\
  46. .replace("${prefix}", PREFIX)
  47. import subprocess
  48. import signal
  49. import re
  50. import errno
  51. import time
  52. import select
  53. import random
  54. import socket
  55. from optparse import OptionParser, OptionValueError
  56. import io
  57. import pwd
  58. import posix
  59. import copy
  60. from bind10_config import LIBEXECPATH
  61. import bind10_config
  62. import isc.cc
  63. import isc.util.process
  64. import isc.net.parse
  65. import isc.log
  66. import isc.config
  67. from isc.log_messages.init_messages import *
  68. import isc.bind10.component
  69. import isc.bind10.special_component
  70. import isc.bind10.socket_cache
  71. import isc.util.traceback_handler
  72. import libutil_io_python
  73. import tempfile
  74. isc.log.init("b10-init", buffer=True)
  75. logger = isc.log.Logger("init")
  76. # Pending system-wide debug level definitions, the ones we
  77. # use here are hardcoded for now
  78. DBG_PROCESS = logger.DBGLVL_TRACE_BASIC
  79. DBG_COMMANDS = logger.DBGLVL_TRACE_DETAIL
  80. # Messages sent over the unix domain socket to indicate if it is followed by a
  81. # real socket
  82. CREATOR_SOCKET_OK = b"1\n"
  83. CREATOR_SOCKET_UNAVAILABLE = b"0\n"
  84. # RCodes of known exceptions for the get_token command
  85. CREATOR_SOCKET_ERROR = 2
  86. CREATOR_SHARE_ERROR = 3
  87. # Assign this process some longer name
  88. isc.util.process.rename()
  89. # This is the version that gets displayed to the user.
  90. # The VERSION string consists of the module name, the module version
  91. # number, and the overall BIND 10 version number (set in configure.ac).
  92. VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
  93. # This is for boot_time of Init
  94. _BASETIME = time.gmtime()
  95. # Detailed error message commonly used on startup failure, possibly due to
  96. # permission issue regarding log lock file. We dump verbose message because
  97. # it may not be clear exactly what to do if it simply says
  98. # "failed to open <filename>: permission denied"
  99. NOTE_ON_LOCK_FILE = """\
  100. TIP: if this is about permission error for a lock file, check if the directory
  101. of the file is writable for the user of the bind10 process; often you need
  102. to start bind10 as a super user. Also, if you specify the -u option to
  103. change the user and group, the directory must be writable for the group,
  104. and the created lock file must be writable for that user. Finally, make sure
  105. the lock file is not left in the directly before restarting.
  106. """
  107. class ProcessInfoError(Exception): pass
  108. class ChangeUserError(Exception):
  109. '''Exception raised when setuid/setgid fails.
  110. When raised, it's expected to be propagated via underlying component
  111. management modules to the top level so that it will help provide useful
  112. fatal error message.
  113. '''
  114. pass
  115. class ProcessInfo:
  116. """Information about a process"""
  117. dev_null = open(os.devnull, "w")
  118. def __init__(self, name, args, env={}, dev_null_stdout=False,
  119. dev_null_stderr=False):
  120. self.name = name
  121. self.args = args
  122. self.env = env
  123. self.dev_null_stdout = dev_null_stdout
  124. self.dev_null_stderr = dev_null_stderr
  125. self.process = None
  126. self.pid = None
  127. def _preexec_work(self):
  128. """Function used before running a program that needs to run as a
  129. different user."""
  130. # First, put us into a separate process group so we don't get
  131. # SIGINT signals on Ctrl-C (b10-init will shut everything down by
  132. # other means).
  133. os.setpgrp()
  134. def _spawn(self):
  135. if self.dev_null_stdout:
  136. spawn_stdout = self.dev_null
  137. else:
  138. spawn_stdout = None
  139. if self.dev_null_stderr:
  140. spawn_stderr = self.dev_null
  141. else:
  142. spawn_stderr = None
  143. # Environment variables for the child process will be a copy of those
  144. # of the b10-init process with any additional specific variables given
  145. # on construction (self.env).
  146. spawn_env = copy.deepcopy(os.environ)
  147. spawn_env.update(self.env)
  148. spawn_env['PATH'] = LIBEXECPATH + ':' + spawn_env['PATH']
  149. self.process = subprocess.Popen(self.args,
  150. stdin=subprocess.PIPE,
  151. stdout=spawn_stdout,
  152. stderr=spawn_stderr,
  153. close_fds=True,
  154. env=spawn_env,
  155. preexec_fn=self._preexec_work)
  156. self.pid = self.process.pid
  157. # spawn() and respawn() are the same for now, but in the future they
  158. # may have different functionality
  159. def spawn(self):
  160. self._spawn()
  161. def respawn(self):
  162. self._spawn()
  163. class CChannelConnectError(Exception): pass
  164. class ProcessStartError(Exception): pass
  165. class Init:
  166. """Init of BIND class."""
  167. def __init__(self, msgq_socket_file=None, data_path=None,
  168. config_filename=None, clear_config=False,
  169. verbose=False, nokill=False, setuid=None, setgid=None,
  170. username=None, cmdctl_port=None, wait_time=10):
  171. """
  172. Initialize the Init of BIND. This is a singleton (only one can
  173. run).
  174. The msgq_socket_file specifies the UNIX domain socket file that the
  175. msgq process listens on. If verbose is True, then b10-init reports
  176. what it is doing.
  177. Data path and config filename are passed through to config manager
  178. (if provided) and specify the config file to be used.
  179. The cmdctl_port is passed to cmdctl and specify on which port it
  180. should listen.
  181. wait_time controls the amount of time (in seconds) that Init waits
  182. for selected processes to initialize before continuing with the
  183. initialization. Currently this is only the configuration manager.
  184. """
  185. self.cc_session = None
  186. self.ccs = None
  187. self.curproc = None
  188. self.msgq_socket_file = msgq_socket_file
  189. self.component_config = {}
  190. # Some time in future, it may happen that a single component has
  191. # multple processes (like a pipeline-like component). If so happens,
  192. # name "components" may be inappropriate. But as the code isn't
  193. # probably completely ready for it, we leave it at components for
  194. # now. We also want to support multiple instances of a single
  195. # component. If it turns out that we'll have a single component with
  196. # multiple same processes or if we start multiple components with the
  197. # same configuration (we do this now, but it might change) is an open
  198. # question.
  199. self.components = {}
  200. # Simply list of components that died and need to wait for a
  201. # restart. Components manage their own restart schedule now
  202. self.components_to_restart = []
  203. self.runnable = False
  204. self.__uid = setuid
  205. self.__gid = setgid
  206. self.username = username
  207. self.verbose = verbose
  208. self.nokill = nokill
  209. self.data_path = data_path
  210. self.config_filename = config_filename
  211. self.clear_config = clear_config
  212. self.cmdctl_port = cmdctl_port
  213. self.wait_time = wait_time
  214. self.msgq_timeout = 5
  215. # _run_under_unittests is only meant to be used when testing. It
  216. # bypasses execution of some code to help with testing.
  217. self._run_under_unittests = False
  218. self._component_configurator = isc.bind10.component.Configurator(self,
  219. isc.bind10.special_component.get_specials())
  220. # The priorities here make them start in the correct order. First
  221. # the socket creator (which would drop root privileges by then),
  222. # then message queue and after that the config manager (which uses
  223. # the config manager)
  224. self.__core_components = {
  225. 'sockcreator': {
  226. 'kind': 'core',
  227. 'special': 'sockcreator',
  228. 'priority': 200
  229. },
  230. 'msgq': {
  231. 'kind': 'core',
  232. 'special': 'msgq',
  233. 'priority': 199
  234. },
  235. 'cfgmgr': {
  236. 'kind': 'core',
  237. 'special': 'cfgmgr',
  238. 'priority': 198
  239. }
  240. }
  241. self.__started = False
  242. self.exitcode = 0
  243. # If -v was set, enable full debug logging.
  244. if self.verbose:
  245. logger.set_severity("DEBUG", 99)
  246. # This is set in init_socket_srv
  247. self._socket_path = None
  248. self._socket_cache = None
  249. self._tmpdir = None
  250. self._srv_socket = None
  251. self._unix_sockets = {}
  252. def __propagate_component_config(self, config):
  253. comps = dict(config)
  254. # Fill in the core components, so they stay alive
  255. for comp in self.__core_components:
  256. if comp in comps:
  257. raise Exception(comp + " is core component managed by " +
  258. "b10-init, do not set it")
  259. comps[comp] = self.__core_components[comp]
  260. # Update the configuration
  261. self._component_configurator.reconfigure(comps)
  262. def change_user(self):
  263. '''Change the user and group to those specified on construction.
  264. This method is expected to be called by a component on initial
  265. startup when the system is ready to switch the user and group
  266. (i.e., once all components that need the privilege of the original
  267. user have started).
  268. '''
  269. try:
  270. if self.__gid is not None:
  271. logger.info(BIND10_SETGID, self.__gid)
  272. posix.setgid(self.__gid)
  273. except Exception as ex:
  274. raise ChangeUserError('failed to change group: ' + str(ex))
  275. try:
  276. if self.__uid is not None:
  277. posix.setuid(self.__uid)
  278. # We use one-shot logger after setuid here. This will
  279. # detect any permission issue regarding logging due to the
  280. # result of setuid at the earliest opportunity.
  281. isc.log.Logger("b10-init").info(BIND10_SETUID, self.__uid)
  282. except Exception as ex:
  283. raise ChangeUserError('failed to change user: ' + str(ex))
  284. def config_handler(self, new_config):
  285. # If this is initial update, don't do anything now, leave it to startup
  286. if not self.runnable:
  287. return
  288. logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
  289. new_config)
  290. try:
  291. if 'components' in new_config:
  292. self.__propagate_component_config(new_config['components'])
  293. return isc.config.ccsession.create_answer(0)
  294. except Exception as e:
  295. logger.error(BIND10_RECONFIGURE_ERROR, e)
  296. return isc.config.ccsession.create_answer(1, str(e))
  297. def get_processes(self):
  298. pids = list(self.components.keys())
  299. pids.sort()
  300. process_list = [ ]
  301. for pid in pids:
  302. process_list.append([pid, self.components[pid].name(),
  303. self.components[pid].address()])
  304. return process_list
  305. def _get_stats_data(self):
  306. return { 'boot_time':
  307. time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
  308. }
  309. def command_handler(self, command, args):
  310. logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
  311. answer = isc.config.ccsession.create_answer(1,
  312. "command not implemented")
  313. if type(command) != str:
  314. answer = isc.config.ccsession.create_answer(1, "bad command")
  315. else:
  316. if command == "shutdown":
  317. self.runnable = False
  318. answer = isc.config.ccsession.create_answer(0)
  319. elif command == "getstats":
  320. answer = isc.config.ccsession.create_answer(
  321. 0, self._get_stats_data())
  322. elif command == "ping":
  323. answer = isc.config.ccsession.create_answer(0, "pong")
  324. elif command == "show_processes":
  325. answer = isc.config.ccsession. \
  326. create_answer(0, self.get_processes())
  327. elif command == "get_socket":
  328. answer = self._get_socket(args)
  329. elif command == "drop_socket":
  330. if "token" not in args:
  331. answer = isc.config.ccsession. \
  332. create_answer(1, "Missing token parameter")
  333. else:
  334. try:
  335. self._socket_cache.drop_socket(args["token"])
  336. answer = isc.config.ccsession.create_answer(0)
  337. except Exception as e:
  338. answer = isc.config.ccsession.create_answer(1, str(e))
  339. else:
  340. answer = isc.config.ccsession.create_answer(1,
  341. "Unknown command")
  342. return answer
  343. def kill_started_components(self):
  344. """
  345. Called as part of the exception handling when a process fails to
  346. start, this runs through the list of started processes, killing
  347. each one. It then clears that list.
  348. """
  349. logger.info(BIND10_KILLING_ALL_PROCESSES)
  350. self.__kill_children(True)
  351. self.components = {}
  352. def _read_bind10_config(self):
  353. """
  354. Reads the parameters associated with the Init module itself.
  355. This means the list of components we should start now.
  356. This could easily be combined into start_all_processes, but
  357. it stays because of historical reasons and because the tests
  358. replace the method sometimes.
  359. """
  360. logger.info(BIND10_READING_INIT_CONFIGURATION)
  361. config_data = self.ccs.get_full_config()
  362. self.__propagate_component_config(config_data['components'])
  363. def log_starting(self, process, port = None, address = None):
  364. """
  365. A convenience function to output a "Starting xxx" message if the
  366. logging is set to DEBUG with debuglevel DBG_PROCESS or higher.
  367. Putting this into a separate method ensures
  368. that the output form is consistent across all processes.
  369. The process name (passed as the first argument) is put into
  370. self.curproc, and is used to indicate which process failed to
  371. start if there is an error (and is used in the "Started" message
  372. on success). The optional port and address information are
  373. appended to the message (if present).
  374. """
  375. self.curproc = process
  376. if port is None and address is None:
  377. logger.info(BIND10_STARTING_PROCESS, self.curproc)
  378. elif address is None:
  379. logger.info(BIND10_STARTING_PROCESS_PORT, self.curproc,
  380. port)
  381. else:
  382. logger.info(BIND10_STARTING_PROCESS_PORT_ADDRESS,
  383. self.curproc, AddressFormatter((address, port)))
  384. def log_started(self, pid = None):
  385. """
  386. A convenience function to output a 'Started xxxx (PID yyyy)'
  387. message. As with starting_message(), this ensures a consistent
  388. format.
  389. """
  390. if pid is None:
  391. logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
  392. else:
  393. logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc,
  394. pid)
  395. def process_running(self, msg, who):
  396. """
  397. Some processes return a message to the Init after they have
  398. started to indicate that they are running. The form of the
  399. message is a dictionary with contents {"running:", "<process>"}.
  400. This method checks the passed message and returns True if the
  401. "who" process is contained in the message (so is presumably
  402. running). It returns False for all other conditions and will
  403. log an error if appropriate.
  404. """
  405. if msg is not None:
  406. try:
  407. if msg["running"] == who:
  408. return True
  409. else:
  410. logger.error(BIND10_STARTUP_UNEXPECTED_MESSAGE, msg)
  411. except:
  412. logger.error(BIND10_STARTUP_UNRECOGNISED_MESSAGE, msg)
  413. return False
  414. # The next few methods start the individual processes of BIND-10. They
  415. # are called via start_all_processes(). If any fail, an exception is
  416. # raised which is caught by the caller of start_all_processes(); this kills
  417. # processes started up to that point before terminating the program.
  418. def _make_process_info(self, name, args, env,
  419. dev_null_stdout=False, dev_null_stderr=False):
  420. """
  421. Wrapper around ProcessInfo(), useful to override
  422. ProcessInfo() creation during testing.
  423. """
  424. return ProcessInfo(name, args, env, dev_null_stdout, dev_null_stderr)
  425. def start_msgq(self):
  426. """
  427. Start the message queue and connect to the command channel.
  428. """
  429. self.log_starting("b10-msgq")
  430. msgq_proc = self._make_process_info("b10-msgq", ["b10-msgq"],
  431. self.c_channel_env,
  432. True, not self.verbose)
  433. msgq_proc.spawn()
  434. self.log_started(msgq_proc.pid)
  435. # Now connect to the c-channel
  436. cc_connect_start = time.time()
  437. while self.cc_session is None:
  438. # if we are run under unittests, break
  439. if self._run_under_unittests:
  440. break
  441. # if we have been trying for "a while" give up
  442. if (time.time() - cc_connect_start) > self.msgq_timeout:
  443. if msgq_proc.process:
  444. msgq_proc.process.kill()
  445. logger.error(BIND10_CONNECTING_TO_CC_FAIL)
  446. raise CChannelConnectError("Unable to connect to c-channel " +
  447. "after 5 seconds")
  448. # try to connect, and if we can't wait a short while
  449. try:
  450. self.cc_session = isc.cc.Session(self.msgq_socket_file)
  451. except isc.cc.session.SessionError:
  452. time.sleep(0.1)
  453. # Subscribe to the message queue. The only messages we expect to
  454. # receive on this channel are once relating to process startup.
  455. if self.cc_session is not None:
  456. self.cc_session.group_subscribe("Init")
  457. return msgq_proc
  458. def wait_msgq(self):
  459. """
  460. Wait for the message queue to fully start. It does so only after
  461. the config manager connects to it. We know it is ready when it
  462. starts answering commands.
  463. We don't add a specific command for it here, an error response is
  464. as good as positive one to know it is alive.
  465. """
  466. # We do 10 times shorter sleep here (since the start should be fast
  467. # now), so we have 10 times more attempts.
  468. time_remaining = self.wait_time * 10
  469. retry = True
  470. while time_remaining > 0 and retry:
  471. try:
  472. self.ccs.rpc_call('AreYouThere?', 'Msgq')
  473. # We don't expect this to succeed. If it does, it's programmer
  474. # error
  475. raise Exception("Non-existing RPC call succeeded")
  476. except isc.config.RPCRecipientMissing:
  477. retry = True # Not there yet
  478. time.sleep(0.1)
  479. time_remaining -= 1
  480. except isc.config.RPCError:
  481. retry = False # It doesn't like the RPC, so it's alive now
  482. if retry: # Still not started
  483. raise ProcessStartError("Msgq didn't complete the second stage " +
  484. "of startup")
  485. def start_cfgmgr(self):
  486. """
  487. Starts the configuration manager process
  488. """
  489. self.log_starting("b10-cfgmgr")
  490. args = ["b10-cfgmgr"]
  491. if self.data_path is not None:
  492. args.append("--data-path=" + self.data_path)
  493. if self.config_filename is not None:
  494. args.append("--config-filename=" + self.config_filename)
  495. if self.clear_config:
  496. args.append("--clear-config")
  497. bind_cfgd = self._make_process_info("b10-cfgmgr", args,
  498. self.c_channel_env)
  499. bind_cfgd.spawn()
  500. self.log_started(bind_cfgd.pid)
  501. # Wait for the configuration manager to start up as
  502. # subsequent initialization cannot proceed without it. The
  503. # time to wait can be set on the command line.
  504. time_remaining = self.wait_time
  505. msg, env = self.cc_session.group_recvmsg()
  506. while time_remaining > 0 and not self.process_running(msg,
  507. "ConfigManager"):
  508. logger.debug(DBG_PROCESS, BIND10_WAIT_CFGMGR)
  509. time.sleep(1)
  510. time_remaining = time_remaining - 1
  511. msg, env = self.cc_session.group_recvmsg()
  512. if not self.process_running(msg, "ConfigManager"):
  513. raise ProcessStartError("Configuration manager process has not " +
  514. "started")
  515. return bind_cfgd
  516. def start_ccsession(self, c_channel_env):
  517. """
  518. Start the CC Session
  519. The argument c_channel_env is unused but is supplied to keep the
  520. argument list the same for all start_xxx methods.
  521. With regards to logging, note that as the CC session is not a
  522. process, the log_starting/log_started methods are not used.
  523. """
  524. logger.info(BIND10_STARTING_CC)
  525. # Unsubscribe from the other CC session first, because we only
  526. # monitor one and msgq expects all data sent to us to be read,
  527. # or it will close its side of the socket.
  528. if self.cc_session is not None:
  529. self.cc_session.group_unsubscribe("Init")
  530. self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
  531. self.config_handler,
  532. self.command_handler,
  533. socket_file = self.msgq_socket_file)
  534. self.ccs.start()
  535. logger.debug(DBG_PROCESS, BIND10_STARTED_CC)
  536. # A couple of utility methods for starting processes...
  537. def start_process(self, name, args, c_channel_env, port=None,
  538. address=None):
  539. """
  540. Given a set of command arguments, start the process and output
  541. appropriate log messages. If the start is successful, the process
  542. is added to the list of started processes.
  543. The port and address arguments are for log messages only.
  544. """
  545. self.log_starting(name, port, address)
  546. newproc = self._make_process_info(name, args, c_channel_env)
  547. newproc.spawn()
  548. self.log_started(newproc.pid)
  549. return newproc
  550. def register_process(self, pid, component):
  551. """
  552. Put another process into b10-init to watch over it. When the process
  553. dies, the component.failed() is called with the exit code.
  554. It is expected the info is a isc.bind10.component.BaseComponent
  555. subclass (or anything having the same interface).
  556. """
  557. self.components[pid] = component
  558. def start_simple(self, name):
  559. """
  560. Most of the BIND-10 processes are started with the command:
  561. <process-name> [-v]
  562. ... where -v is appended if verbose is enabled. This method
  563. generates the arguments from the name and starts the process.
  564. The port and address arguments are for log messages only.
  565. """
  566. # Set up the command arguments.
  567. args = [name]
  568. if self.verbose:
  569. args += ['-v']
  570. # ... and start the process
  571. return self.start_process(name, args, self.c_channel_env)
  572. # The next few methods start up the rest of the BIND-10 processes.
  573. # Although many of these methods are little more than a call to
  574. # start_simple, they are retained (a) for testing reasons and (b) as a
  575. # place where modifications can be made if the process start-up sequence
  576. # changes for a given process.
  577. def start_auth(self):
  578. """
  579. Start the Authoritative server
  580. """
  581. authargs = ['b10-auth']
  582. if self.verbose:
  583. authargs += ['-v']
  584. # ... and start
  585. return self.start_process("b10-auth", authargs, self.c_channel_env)
  586. def start_resolver(self):
  587. """
  588. Start the Resolver. At present, all these arguments and switches
  589. are pure speculation. As with the auth daemon, they should be
  590. read from the configuration database.
  591. """
  592. self.curproc = "b10-resolver"
  593. # XXX: this must be read from the configuration manager in the future
  594. resargs = ['b10-resolver']
  595. if self.verbose:
  596. resargs += ['-v']
  597. # ... and start
  598. return self.start_process("b10-resolver", resargs, self.c_channel_env)
  599. def start_cmdctl(self):
  600. """
  601. Starts the command control process
  602. """
  603. args = ["b10-cmdctl"]
  604. if self.cmdctl_port is not None:
  605. args.append("--port=" + str(self.cmdctl_port))
  606. if self.verbose:
  607. args.append("-v")
  608. return self.start_process("b10-cmdctl", args, self.c_channel_env,
  609. self.cmdctl_port)
  610. def start_all_components(self):
  611. """
  612. Starts up all the components. Any exception generated during the
  613. starting of the components is handled by the caller.
  614. """
  615. # Start the real core (sockcreator, msgq, cfgmgr)
  616. self._component_configurator.startup(self.__core_components)
  617. # Connect to the msgq. This is not a process, so it's not handled
  618. # inside the configurator.
  619. self.start_ccsession(self.c_channel_env)
  620. # Make sure msgq is fully started before proceeding to the rest
  621. # of the components.
  622. self.wait_msgq()
  623. # Extract the parameters associated with Init. This can only be
  624. # done after the CC Session is started. Note that the logging
  625. # configuration may override the "-v" switch set on the command line.
  626. self._read_bind10_config()
  627. # TODO: Return the dropping of privileges
  628. def startup(self):
  629. """
  630. Start the Init instance.
  631. Returns None if successful, otherwise an string describing the
  632. problem.
  633. """
  634. # Try to connect to the c-channel daemon, to see if it is already
  635. # running
  636. c_channel_env = {}
  637. if self.msgq_socket_file is not None:
  638. c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
  639. logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
  640. try:
  641. self.cc_session = isc.cc.Session(self.msgq_socket_file)
  642. logger.fatal(BIND10_MSGQ_ALREADY_RUNNING)
  643. return "b10-msgq already running, or socket file not cleaned , " +\
  644. "cannot start"
  645. except isc.cc.session.SessionError:
  646. # this is the case we want, where the msgq is not running
  647. pass
  648. # Start all components. If any one fails to start, kill all started
  649. # components and exit with an error indication.
  650. try:
  651. self.c_channel_env = c_channel_env
  652. self.start_all_components()
  653. except ChangeUserError as e:
  654. self.kill_started_components()
  655. return str(e) + '; ' + NOTE_ON_LOCK_FILE.replace('\n', ' ')
  656. except Exception as e:
  657. self.kill_started_components()
  658. return "Unable to start " + self.curproc + ": " + str(e)
  659. # Started successfully
  660. self.runnable = True
  661. self.__started = True
  662. return None
  663. def stop_process(self, process, recipient, pid):
  664. """
  665. Stop the given process, friendly-like. The process is the name it has
  666. (in logs, etc), the recipient is the address on msgq. The pid is the
  667. pid of the process (if we have multiple processes of the same name,
  668. it might want to choose if it is for this one).
  669. """
  670. logger.info(BIND10_STOP_PROCESS, process)
  671. try:
  672. self.cc_session.group_sendmsg(isc.config.ccsession.
  673. create_command('shutdown',
  674. {'pid': pid}),
  675. recipient, recipient)
  676. except:
  677. logger.error(BIND10_COMPONENT_SHUTDOWN_ERROR, process)
  678. raise
  679. def component_shutdown(self, exitcode=0):
  680. """
  681. Stop the Init instance from a components' request. The exitcode
  682. indicates the desired exit code.
  683. If we did not start yet, it raises an exception, which is meant
  684. to propagate through the component and configurator to the startup
  685. routine and abort the startup immediately. If it is started up already,
  686. we just mark it so we terminate soon.
  687. It does set the exit code in both cases.
  688. """
  689. self.exitcode = exitcode
  690. if not self.__started:
  691. raise Exception("Component failed during startup");
  692. else:
  693. self.runnable = False
  694. def shutdown(self):
  695. """Stop the Init instance."""
  696. logger.info(BIND10_SHUTDOWN)
  697. # If ccsession is still there, inform rest of the system this module
  698. # is stopping. Since everything will be stopped shortly, this is not
  699. # really necessary, but this is done to reflect that b10-init is also
  700. # 'just' a module.
  701. self.ccs.send_stopping()
  702. # try using the BIND 10 request to stop
  703. try:
  704. self._component_configurator.shutdown()
  705. except:
  706. pass
  707. # XXX: some delay probably useful... how much is uncertain
  708. # I have changed the delay from 0.5 to 1, but sometime it's
  709. # still not enough.
  710. time.sleep(1)
  711. self.reap_children()
  712. # Send TERM and KILL signals to modules if we're not prevented
  713. # from doing so
  714. if not self.nokill:
  715. # next try sending a SIGTERM
  716. self.__kill_children(False)
  717. # finally, send SIGKILL (unmaskable termination) until everybody
  718. # dies
  719. while self.components:
  720. # XXX: some delay probably useful... how much is uncertain
  721. time.sleep(0.1)
  722. self.reap_children()
  723. self.__kill_children(True)
  724. logger.info(BIND10_SHUTDOWN_COMPLETE)
  725. def __kill_children(self, forceful):
  726. '''Terminate remaining subprocesses by sending a signal.
  727. The forceful paramter will be passed Component.kill().
  728. This is a dedicated subroutine of shutdown(), just to unify two
  729. similar cases.
  730. '''
  731. logmsg = BIND10_SEND_SIGKILL if forceful else BIND10_SEND_SIGTERM
  732. # We need to make a copy of values as the components may be modified
  733. # in the loop.
  734. for component in list(self.components.values()):
  735. logger.info(logmsg, component.name(), component.pid())
  736. try:
  737. component.kill(forceful)
  738. except OSError as ex:
  739. # If kill() failed due to EPERM, it doesn't make sense to
  740. # keep trying, so we just log the fact and forget that
  741. # component. Ignore other OSErrors (usually ESRCH because
  742. # the child finally exited)
  743. signame = "SIGKILL" if forceful else "SIGTERM"
  744. logger.info(BIND10_SEND_SIGNAL_FAIL, signame,
  745. component.name(), component.pid(), ex)
  746. if ex.errno == errno.EPERM:
  747. del self.components[component.pid()]
  748. def _get_process_exit_status(self):
  749. return os.waitpid(-1, os.WNOHANG)
  750. def reap_children(self):
  751. """Check to see if any of our child processes have exited,
  752. and note this for later handling.
  753. """
  754. while True:
  755. try:
  756. (pid, exit_status) = self._get_process_exit_status()
  757. except OSError as o:
  758. if o.errno == errno.ECHILD:
  759. break
  760. # XXX: should be impossible to get any other error here
  761. raise
  762. if pid == 0:
  763. break
  764. if pid in self.components:
  765. # One of the components we know about. Get information on it.
  766. component = self.components.pop(pid)
  767. logger.info(BIND10_PROCESS_ENDED, component.name(), pid,
  768. exit_status)
  769. if component.is_running() and self.runnable:
  770. # Tell it it failed. But only if it matters (we are
  771. # not shutting down and the component considers itself
  772. # to be running.
  773. component_restarted = component.failed(exit_status);
  774. # if the process wants to be restarted, but not just yet,
  775. # it returns False
  776. if not component_restarted:
  777. self.components_to_restart.append(component)
  778. else:
  779. logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)
  780. def restart_processes(self):
  781. """
  782. Restart any dead processes:
  783. * Returns the time when the next process is ready to be restarted.
  784. * If the server is shutting down, returns 0.
  785. * If there are no processes, returns None.
  786. The values returned can be safely passed into select() as the
  787. timeout value.
  788. """
  789. if not self.runnable:
  790. return 0
  791. still_dead = []
  792. # keep track of the first time we need to check this queue again,
  793. # if at all
  794. next_restart_time = None
  795. now = time.time()
  796. for component in self.components_to_restart:
  797. # If the component was removed from the configurator between since
  798. # scheduled to restart, just ignore it. The object will just be
  799. # dropped here.
  800. if not self._component_configurator.has_component(component):
  801. logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
  802. elif not component.restart(now):
  803. still_dead.append(component)
  804. if next_restart_time is None or\
  805. next_restart_time > component.get_restart_time():
  806. next_restart_time = component.get_restart_time()
  807. self.components_to_restart = still_dead
  808. return next_restart_time
  809. def _get_socket(self, args):
  810. """
  811. Implementation of the get_socket CC command. It asks the cache
  812. to provide the token and sends the information back.
  813. """
  814. try:
  815. try:
  816. addr = isc.net.parse.addr_parse(args['address'])
  817. port = isc.net.parse.port_parse(args['port'])
  818. protocol = args['protocol']
  819. if protocol not in ['UDP', 'TCP']:
  820. raise ValueError("Protocol must be either UDP or TCP")
  821. share_mode = args['share_mode']
  822. if share_mode not in ['ANY', 'SAMEAPP', 'NO']:
  823. raise ValueError("Share mode must be one of ANY, SAMEAPP" +
  824. " or NO")
  825. share_name = args['share_name']
  826. except KeyError as ke:
  827. return \
  828. isc.config.ccsession.create_answer(1,
  829. "Missing parameter " +
  830. str(ke))
  831. # FIXME: This call contains blocking IPC. It is expected to be
  832. # short, but if it turns out to be problem, we'll need to do
  833. # something about it.
  834. token = self._socket_cache.get_token(protocol, addr, port,
  835. share_mode, share_name)
  836. return isc.config.ccsession.create_answer(0, {
  837. 'token': token,
  838. 'path': self._socket_path
  839. })
  840. except isc.bind10.socket_cache.SocketError as e:
  841. return isc.config.ccsession.create_answer(CREATOR_SOCKET_ERROR,
  842. str(e))
  843. except isc.bind10.socket_cache.ShareError as e:
  844. return isc.config.ccsession.create_answer(CREATOR_SHARE_ERROR,
  845. str(e))
  846. except Exception as e:
  847. return isc.config.ccsession.create_answer(1, str(e))
  848. def socket_request_handler(self, token, unix_socket):
  849. """
  850. This function handles a token that comes over a unix_domain socket.
  851. The function looks into the _socket_cache and sends the socket
  852. identified by the token back over the unix_socket.
  853. """
  854. try:
  855. token = str(token, 'ASCII') # Convert from bytes to str
  856. fd = self._socket_cache.get_socket(token, unix_socket.fileno())
  857. # FIXME: These two calls are blocking in their nature. An OS-level
  858. # buffer is likely to be large enough to hold all these data, but
  859. # if it wasn't and the remote application got stuck, we would have
  860. # a problem. If there appear such problems, we should do something
  861. # about it.
  862. unix_socket.sendall(CREATOR_SOCKET_OK)
  863. libutil_io_python.send_fd(unix_socket.fileno(), fd)
  864. except Exception as e:
  865. logger.info(BIND10_NO_SOCKET, token, e)
  866. unix_socket.sendall(CREATOR_SOCKET_UNAVAILABLE)
  867. def socket_consumer_dead(self, unix_socket):
  868. """
  869. This function handles when a unix_socket closes. This means all
  870. sockets sent to it are to be considered closed. This function signals
  871. so to the _socket_cache.
  872. """
  873. logger.info(BIND10_LOST_SOCKET_CONSUMER, unix_socket.fileno())
  874. try:
  875. self._socket_cache.drop_application(unix_socket.fileno())
  876. except ValueError:
  877. # This means the application holds no sockets. It's harmless, as it
  878. # can happen in real life - for example, it requests a socket, but
  879. # get_socket doesn't find it, so the application dies. It should be
  880. # rare, though.
  881. pass
  882. def set_creator(self, creator):
  883. """
  884. Registeres a socket creator into the b10-init. The socket creator is
  885. not used directly, but through a cache. The cache is created in this
  886. method.
  887. If called more than once, it raises a ValueError.
  888. """
  889. if self._socket_cache is not None:
  890. raise ValueError("A creator was inserted previously")
  891. self._socket_cache = isc.bind10.socket_cache.Cache(creator)
  892. def init_socket_srv(self):
  893. """
  894. Creates and listens on a unix-domain socket to be able to send out
  895. the sockets.
  896. This method should be called after switching user, or the switched
  897. applications won't be able to access the socket.
  898. """
  899. self._srv_socket = socket.socket(socket.AF_UNIX)
  900. # We create a temporary directory somewhere safe and unique, to avoid
  901. # the need to find the place ourself or bother users. Also, this
  902. # secures the socket on some platforms, as it creates a private
  903. # directory.
  904. self._tmpdir = tempfile.mkdtemp(prefix='sockcreator-')
  905. # Get the name
  906. self._socket_path = os.path.join(self._tmpdir, "sockcreator")
  907. # And bind the socket to the name
  908. self._srv_socket.bind(self._socket_path)
  909. self._srv_socket.listen(5)
  910. def remove_socket_srv(self):
  911. """
  912. Closes and removes the listening socket and the directory where it
  913. lives, as we created both.
  914. It does nothing if the _srv_socket is not set (eg. it was not yet
  915. initialized).
  916. """
  917. if self._srv_socket is not None:
  918. self._srv_socket.close()
  919. if os.path.exists(self._socket_path):
  920. os.remove(self._socket_path)
  921. if os.path.isdir(self._tmpdir):
  922. os.rmdir(self._tmpdir)
  923. def _srv_accept(self):
  924. """
  925. Accept a socket from the unix domain socket server and put it to the
  926. others we care about.
  927. """
  928. (socket, conn) = self._srv_socket.accept()
  929. self._unix_sockets[socket.fileno()] = (socket, b'')
  930. def _socket_data(self, socket_fileno):
  931. """
  932. This is called when a socket identified by the socket_fileno needs
  933. attention. We try to read data from there. If it is closed, we remove
  934. it.
  935. """
  936. (sock, previous) = self._unix_sockets[socket_fileno]
  937. while True:
  938. try:
  939. data = sock.recv(1, socket.MSG_DONTWAIT)
  940. except socket.error as se:
  941. # These two might be different on some systems
  942. if se.errno == errno.EAGAIN or se.errno == errno.EWOULDBLOCK:
  943. # No more data now. Oh, well, just store what we have.
  944. self._unix_sockets[socket_fileno] = (sock, previous)
  945. return
  946. else:
  947. data = b'' # Pretend it got closed
  948. if len(data) == 0: # The socket got to it's end
  949. del self._unix_sockets[socket_fileno]
  950. self.socket_consumer_dead(sock)
  951. sock.close()
  952. return
  953. else:
  954. if data == b"\n":
  955. # Handle this token and clear it
  956. self.socket_request_handler(previous, sock)
  957. previous = b''
  958. else:
  959. previous += data
  960. def run(self, wakeup_fd):
  961. """
  962. The main loop, waiting for sockets, commands and dead processes.
  963. Runs as long as the runnable is true.
  964. The wakeup_fd descriptor is the read end of pipe where CHLD signal
  965. handler writes.
  966. """
  967. ccs_fd = self.ccs.get_socket().fileno()
  968. while self.runnable:
  969. # clean up any processes that exited
  970. self.reap_children()
  971. next_restart = self.restart_processes()
  972. if next_restart is None:
  973. wait_time = None
  974. else:
  975. wait_time = max(next_restart - time.time(), 0)
  976. # select() can raise EINTR when a signal arrives,
  977. # even if they are resumable, so we have to catch
  978. # the exception
  979. try:
  980. (rlist, wlist, xlist) = \
  981. select.select([wakeup_fd, ccs_fd,
  982. self._srv_socket.fileno()] +
  983. list(self._unix_sockets.keys()), [], [],
  984. wait_time)
  985. except select.error as err:
  986. if err.args[0] == errno.EINTR:
  987. (rlist, wlist, xlist) = ([], [], [])
  988. else:
  989. logger.fatal(BIND10_SELECT_ERROR, err)
  990. break
  991. for fd in rlist + xlist:
  992. if fd == ccs_fd:
  993. try:
  994. self.ccs.check_command()
  995. except isc.cc.session.ProtocolError:
  996. logger.fatal(BIND10_MSGQ_DISAPPEARED)
  997. self.runnable = False
  998. break
  999. elif fd == wakeup_fd:
  1000. os.read(wakeup_fd, 32)
  1001. elif fd == self._srv_socket.fileno():
  1002. self._srv_accept()
  1003. elif fd in self._unix_sockets:
  1004. self._socket_data(fd)
  1005. # global variables, needed for signal handlers
  1006. options = None
  1007. b10_init = None
  1008. def reaper(signal_number, stack_frame):
  1009. """A child process has died (SIGCHLD received)."""
  1010. # don't do anything...
  1011. # the Python signal handler has been set up to write
  1012. # down a pipe, waking up our select() bit
  1013. pass
  1014. def get_signame(signal_number):
  1015. """Return the symbolic name for a signal."""
  1016. for sig in dir(signal):
  1017. if sig.startswith("SIG") and sig[3].isalnum():
  1018. if getattr(signal, sig) == signal_number:
  1019. return sig
  1020. return "Unknown signal %d" % signal_number
  1021. # XXX: perhaps register atexit() function and invoke that instead
  1022. def fatal_signal(signal_number, stack_frame):
  1023. """We need to exit (SIGINT or SIGTERM received)."""
  1024. global options
  1025. global b10_init
  1026. logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
  1027. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  1028. b10_init.runnable = False
  1029. def process_rename(option, opt_str, value, parser):
  1030. """Function that renames the process if it is requested by a option."""
  1031. isc.util.process.rename(value)
  1032. def parse_args(args=sys.argv[1:], Parser=OptionParser):
  1033. """
  1034. Function for parsing command line arguments. Returns the
  1035. options object from OptionParser.
  1036. """
  1037. parser = Parser(version=VERSION)
  1038. parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
  1039. type="string", default=None,
  1040. help="UNIX domain socket file the b10-msgq daemon " +
  1041. "will use")
  1042. parser.add_option("-i", "--no-kill", action="store_true", dest="nokill",
  1043. default=False,
  1044. help="do not send SIGTERM and SIGKILL signals to " +
  1045. "modules during shutdown")
  1046. parser.add_option("-u", "--user", dest="user", type="string", default=None,
  1047. help="Change user after startup (must run as root)")
  1048. parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
  1049. help="display more about what is going on")
  1050. parser.add_option("--pretty-name", type="string", action="callback",
  1051. callback=process_rename,
  1052. help="Set the process name (displayed in ps, top, ...)")
  1053. parser.add_option("-c", "--config-file", action="store",
  1054. dest="config_file", default=None,
  1055. help="Configuration database filename")
  1056. parser.add_option("--clear-config", action="store_true",
  1057. dest="clear_config", default=False,
  1058. help="Create backup of the configuration file and " +
  1059. "start with a clean configuration")
  1060. parser.add_option("-p", "--data-path", dest="data_path",
  1061. help="Directory to search for configuration files",
  1062. default=None)
  1063. parser.add_option("--cmdctl-port", dest="cmdctl_port", type="int",
  1064. default=None, help="Port of command control")
  1065. parser.add_option("--pid-file", dest="pid_file", type="string",
  1066. default=None,
  1067. help="file to dump the PID of the BIND 10 process")
  1068. parser.add_option("-w", "--wait", dest="wait_time", type="int",
  1069. default=10,
  1070. help="Time (in seconds) to wait for config manager to "
  1071. "start up")
  1072. (options, args) = parser.parse_args(args)
  1073. if options.cmdctl_port is not None:
  1074. try:
  1075. isc.net.parse.port_parse(options.cmdctl_port)
  1076. except ValueError as e:
  1077. parser.error(e)
  1078. if args:
  1079. parser.print_help()
  1080. sys.exit(1)
  1081. return options
  1082. def dump_pid(pid_file):
  1083. """
  1084. Dump the PID of the current process to the specified file. If the given
  1085. file is None this function does nothing. If the file already exists,
  1086. the existing content will be removed. If a system error happens in
  1087. creating or writing to the file, the corresponding exception will be
  1088. propagated to the caller.
  1089. """
  1090. if pid_file is None:
  1091. return
  1092. f = open(pid_file, "w")
  1093. f.write('%d\n' % os.getpid())
  1094. f.close()
  1095. def unlink_pid_file(pid_file):
  1096. """
  1097. Remove the given file, which is basically expected to be the PID file
  1098. created by dump_pid(). The specified may or may not exist; if it
  1099. doesn't this function does nothing. Other system level errors in removing
  1100. the file will be propagated as the corresponding exception.
  1101. """
  1102. if pid_file is None:
  1103. return
  1104. try:
  1105. os.unlink(pid_file)
  1106. except OSError as error:
  1107. if error.errno is not errno.ENOENT:
  1108. raise
  1109. def remove_lock_files():
  1110. """
  1111. Remove various lock files which were created by code such as in the
  1112. logger. This function should be called after BIND 10 shutdown.
  1113. """
  1114. lockfiles = ["logger_lockfile"]
  1115. lpath = bind10_config.DATA_PATH
  1116. if "B10_FROM_BUILD" in os.environ:
  1117. lpath = os.environ["B10_FROM_BUILD"]
  1118. if "B10_FROM_SOURCE_LOCALSTATEDIR" in os.environ:
  1119. lpath = os.environ["B10_FROM_SOURCE_LOCALSTATEDIR"]
  1120. if "B10_LOCKFILE_DIR_FROM_BUILD" in os.environ:
  1121. lpath = os.environ["B10_LOCKFILE_DIR_FROM_BUILD"]
  1122. for f in lockfiles:
  1123. fname = lpath + '/' + f
  1124. if os.path.isfile(fname):
  1125. try:
  1126. os.unlink(fname)
  1127. except OSError as e:
  1128. # We catch and ignore permission related error on unlink.
  1129. # This can happen if bind10 started with -u, created a lock
  1130. # file as a privileged user, but the directory is not writable
  1131. # for the changed user. This setup will cause immediate
  1132. # start failure, and we leave verbose error message including
  1133. # the leftover lock file, so it should be acceptable to ignore
  1134. # it (note that it doesn't make sense to log this event at
  1135. # this poitn)
  1136. if e.errno != errno.EPERM and e.errno != errno.EACCES:
  1137. raise
  1138. return
  1139. def main():
  1140. global options
  1141. global b10_init
  1142. # Enforce line buffering on stdout, even when not a TTY
  1143. sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)
  1144. options = parse_args()
  1145. # Announce startup. Making this is the first log message.
  1146. try:
  1147. logger.info(BIND10_STARTING, VERSION)
  1148. except RuntimeError as e:
  1149. sys.stderr.write('ERROR: failed to write the initial log: %s\n' %
  1150. str(e))
  1151. sys.stderr.write(NOTE_ON_LOCK_FILE)
  1152. sys.exit(1)
  1153. # Check user ID.
  1154. setuid = None
  1155. setgid = None
  1156. username = None
  1157. if options.user:
  1158. # Try getting information about the user, assuming UID passed.
  1159. try:
  1160. pw_ent = pwd.getpwuid(int(options.user))
  1161. setuid = pw_ent.pw_uid
  1162. setgid = pw_ent.pw_gid
  1163. username = pw_ent.pw_name
  1164. except ValueError:
  1165. pass
  1166. except KeyError:
  1167. pass
  1168. # Next try getting information about the user, assuming user name
  1169. # passed.
  1170. # If the information is both a valid user name and user number, we
  1171. # prefer the name because we try it second. A minor point, hopefully.
  1172. try:
  1173. pw_ent = pwd.getpwnam(options.user)
  1174. setuid = pw_ent.pw_uid
  1175. setgid = pw_ent.pw_gid
  1176. username = pw_ent.pw_name
  1177. except KeyError:
  1178. pass
  1179. if setuid is None:
  1180. logger.fatal(BIND10_INVALID_USER, options.user)
  1181. sys.exit(1)
  1182. # Create wakeup pipe for signal handlers
  1183. wakeup_pipe = os.pipe()
  1184. signal.set_wakeup_fd(wakeup_pipe[1])
  1185. # Set signal handlers for catching child termination, as well
  1186. # as our own demise.
  1187. signal.signal(signal.SIGCHLD, reaper)
  1188. signal.siginterrupt(signal.SIGCHLD, False)
  1189. signal.signal(signal.SIGINT, fatal_signal)
  1190. signal.signal(signal.SIGTERM, fatal_signal)
  1191. # Block SIGPIPE, as we don't want it to end this process
  1192. signal.signal(signal.SIGPIPE, signal.SIG_IGN)
  1193. try:
  1194. b10_init = Init(options.msgq_socket_file, options.data_path,
  1195. options.config_file, options.clear_config,
  1196. options.verbose, options.nokill,
  1197. setuid, setgid, username, options.cmdctl_port,
  1198. options.wait_time)
  1199. startup_result = b10_init.startup()
  1200. if startup_result:
  1201. logger.fatal(BIND10_STARTUP_ERROR, startup_result)
  1202. sys.exit(1)
  1203. b10_init.init_socket_srv()
  1204. logger.info(BIND10_STARTUP_COMPLETE)
  1205. dump_pid(options.pid_file)
  1206. # Let it run
  1207. b10_init.run(wakeup_pipe[0])
  1208. # shutdown
  1209. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  1210. b10_init.shutdown()
  1211. finally:
  1212. # Clean up the filesystem
  1213. unlink_pid_file(options.pid_file)
  1214. remove_lock_files()
  1215. if b10_init is not None:
  1216. b10_init.remove_socket_srv()
  1217. sys.exit(b10_init.exitcode)
  1218. if __name__ == "__main__":
  1219. isc.util.traceback_handler.traceback_handler(main)