bind10.py.in 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. #!@PYTHON@
  2. # Copyright (C) 2010 Internet Systems Consortium.
  3. #
  4. # Permission to use, copy, modify, and distribute this software for any
  5. # purpose with or without fee is hereby granted, provided that the above
  6. # copyright notice and this permission notice appear in all copies.
  7. #
  8. # THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
  9. # DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
  10. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
  11. # INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
  12. # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
  13. # FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  14. # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  15. # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. """\
  17. This file implements the Boss of Bind (BoB, or bob) program.
  18. Its purpose is to start up the BIND 10 system, and then manage the
  19. processes, by starting and stopping processes, plus restarting
  20. processes that exit.
  21. To start the system, it first runs the c-channel program (msgq), then
  22. connects to that. It then runs the configuration manager, and reads
  23. its own configuration. Then it proceeds to starting other modules.
  24. The Python subprocess module is used for starting processes, but
  25. because this is not efficient for managing groups of processes,
  26. SIGCHLD signals are caught and processed using the signal module.
  27. Most of the logic is contained in the BoB class. However, since Python
  28. requires that signal processing happen in the main thread, we do
  29. signal handling outside of that class, in the code running for
  30. __main__.
  31. """
  32. import sys; sys.path.append ('@@PYTHONPATH@@')
  33. import os
  34. # If B10_FROM_SOURCE is set in the environment, we use data files
  35. # from a directory relative to that, otherwise we use the ones
  36. # installed on the system
  37. if "B10_FROM_SOURCE" in os.environ:
  38. SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
  39. else:
  40. PREFIX = "@prefix@"
  41. DATAROOTDIR = "@datarootdir@"
  42. SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
  43. import subprocess
  44. import signal
  45. import re
  46. import errno
  47. import time
  48. import select
  49. import random
  50. import socket
  51. from optparse import OptionParser, OptionValueError
  52. import io
  53. import pwd
  54. import posix
  55. import isc.cc
  56. # This is the version that gets displayed to the user.
  57. # The VERSION string consists of the module name, the module version
  58. # number, and the overall BIND 10 version number (set in configure.ac).
  59. VERSION = "bind10 20100916 (BIND 10 @PACKAGE_VERSION@)"
  60. class RestartSchedule:
  61. """
  62. Keeps state when restarting something (in this case, a process).
  63. When a process dies unexpectedly, we need to restart it. However, if
  64. it fails to restart for some reason, then we should not simply keep
  65. restarting it at high speed.
  66. A more sophisticated algorithm can be developed, but for now we choose
  67. a simple set of rules:
  68. * If a process was been running for >=10 seconds, we restart it
  69. right away.
  70. * If a process was running for <10 seconds, we wait until 10 seconds
  71. after it was started.
  72. To avoid programs getting into lockstep, we use a normal distribution
  73. to avoid being restarted at exactly 10 seconds."""
  74. def __init__(self, restart_frequency=10.0):
  75. self.restart_frequency = restart_frequency
  76. self.run_start_time = None
  77. self.run_stop_time = None
  78. self.restart_time = None
  79. def set_run_start_time(self, when=None):
  80. if when is None:
  81. when = time.time()
  82. self.run_start_time = when
  83. sigma = self.restart_frequency * 0.05
  84. self.restart_time = when + random.normalvariate(self.restart_frequency,
  85. sigma)
  86. def set_run_stop_time(self, when=None):
  87. """We don't actually do anything with stop time now, but it
  88. might be useful for future algorithms."""
  89. if when is None:
  90. when = time.time()
  91. self.run_stop_time = when
  92. def get_restart_time(self, when=None):
  93. if when is None:
  94. when = time.time()
  95. return max(when, self.restart_time)
  96. class ProcessInfoError(Exception): pass
  97. class ProcessInfo:
  98. """Information about a process"""
  99. dev_null = open(os.devnull, "w")
  100. def __init__(self, name, args, env={}, dev_null_stdout=False,
  101. dev_null_stderr=False, uid=None, username=None):
  102. self.name = name
  103. self.args = args
  104. self.env = env
  105. self.dev_null_stdout = dev_null_stdout
  106. self.dev_null_stderr = dev_null_stderr
  107. self.restart_schedule = RestartSchedule()
  108. self.uid = uid
  109. self.username = username
  110. self._spawn()
  111. def _setuid(self):
  112. """Function used before running a program that needs to run as a
  113. different user."""
  114. if self.uid is not None:
  115. try:
  116. posix.setuid(self.uid)
  117. except OSError as e:
  118. if e.errno == errno.EPERM:
  119. # if we failed to change user due to permission report that
  120. raise ProcessInfoError("Unable to change to user %s (uid %d)" % (self.username, self.uid))
  121. else:
  122. # otherwise simply re-raise whatever error we found
  123. raise
  124. def _spawn(self):
  125. if self.dev_null_stdout:
  126. spawn_stdout = self.dev_null
  127. else:
  128. spawn_stdout = None
  129. if self.dev_null_stderr:
  130. spawn_stderr = self.dev_null
  131. else:
  132. spawn_stderr = None
  133. # Environment variables for the child process will be a copy of those
  134. # of the boss process with any additional specific variables given
  135. # on construction (self.env).
  136. spawn_env = os.environ
  137. spawn_env.update(self.env)
  138. if 'B10_FROM_SOURCE' not in os.environ:
  139. spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
  140. self.process = subprocess.Popen(self.args,
  141. stdin=subprocess.PIPE,
  142. stdout=spawn_stdout,
  143. stderr=spawn_stderr,
  144. close_fds=True,
  145. env=spawn_env,
  146. preexec_fn=self._setuid)
  147. self.pid = self.process.pid
  148. self.restart_schedule.set_run_start_time()
  149. def respawn(self):
  150. self._spawn()
  151. class IPAddr:
  152. """Stores an IPv4 or IPv6 address."""
  153. family = None
  154. addr = None
  155. def __init__(self, addr):
  156. try:
  157. a = socket.inet_pton(socket.AF_INET, addr)
  158. self.family = socket.AF_INET
  159. self.addr = a
  160. return
  161. except:
  162. pass
  163. try:
  164. a = socket.inet_pton(socket.AF_INET6, addr)
  165. self.family = socket.AF_INET6
  166. self.addr = a
  167. return
  168. except Exception as e:
  169. raise e
  170. def __str__(self):
  171. return socket.inet_ntop(self.family, self.addr)
  172. class BoB:
  173. """Boss of BIND class."""
  174. def __init__(self, msgq_socket_file=None, auth_port=5300, address='',
  175. nocache=False, verbose=False, setuid=None, username=None):
  176. """Initialize the Boss of BIND. This is a singleton (only one
  177. can run).
  178. The msgq_socket_file specifies the UNIX domain socket file
  179. that the msgq process listens on.
  180. If verbose is True, then the boss reports what it is doing.
  181. """
  182. self.verbose = verbose
  183. self.msgq_socket_file = msgq_socket_file
  184. self.auth_port = auth_port
  185. self.address = None
  186. if address:
  187. self.address = IPAddr(address)
  188. self.cc_session = None
  189. self.ccs = None
  190. self.processes = {}
  191. self.dead_processes = {}
  192. self.runnable = False
  193. self.uid = setuid
  194. self.username = username
  195. self.nocache = nocache
  196. def config_handler(self, new_config):
  197. if self.verbose:
  198. sys.stdout.write("[bind10] handling new config:\n")
  199. sys.stdout.write(new_config + "\n")
  200. answer = isc.config.ccsession.create_answer(0)
  201. return answer
  202. # TODO
  203. def command_handler(self, command, args):
  204. if self.verbose:
  205. sys.stdout.write("[bind10] Boss got command:\n")
  206. sys.stdout.write(command + "\n")
  207. answer = isc.config.ccsession.create_answer(1, "command not implemented")
  208. if type(command) != str:
  209. answer = isc.config.ccsession.create_answer(1, "bad command")
  210. else:
  211. cmd = command
  212. if cmd == "shutdown":
  213. sys.stdout.write("[bind10] got shutdown command\n")
  214. self.runnable = False
  215. answer = isc.config.ccsession.create_answer(0)
  216. else:
  217. answer = isc.config.ccsession.create_answer(1,
  218. "Unknown command")
  219. return answer
  220. def startup(self):
  221. """Start the BoB instance.
  222. Returns None if successful, otherwise an string describing the
  223. problem.
  224. """
  225. # try to connect to the c-channel daemon,
  226. # to see if it is already running
  227. c_channel_env = {}
  228. if self.msgq_socket_file is not None:
  229. c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
  230. if self.verbose:
  231. sys.stdout.write("[bind10] Checking for already running b10-msgq\n")
  232. # try to connect, and if we can't wait a short while
  233. try:
  234. self.cc_session = isc.cc.Session(self.msgq_socket_file)
  235. return "b10-msgq already running, or socket file not cleaned , cannot start"
  236. except isc.cc.session.SessionError:
  237. # this is the case we want, where the msgq is not running
  238. pass
  239. # start the c-channel daemon
  240. if self.verbose:
  241. if self.msgq_socket_file:
  242. sys.stdout.write("[bind10] Starting b10-msgq\n")
  243. try:
  244. c_channel = ProcessInfo("b10-msgq", ["b10-msgq"], c_channel_env,
  245. True, not self.verbose, uid=self.uid,
  246. username=self.username)
  247. except Exception as e:
  248. return "Unable to start b10-msgq; " + str(e)
  249. self.processes[c_channel.pid] = c_channel
  250. if self.verbose:
  251. sys.stdout.write("[bind10] Started b10-msgq (PID %d)\n" %
  252. c_channel.pid)
  253. # now connect to the c-channel
  254. cc_connect_start = time.time()
  255. while self.cc_session is None:
  256. # if we have been trying for "a while" give up
  257. if (time.time() - cc_connect_start) > 5:
  258. c_channel.process.kill()
  259. return "Unable to connect to c-channel after 5 seconds"
  260. # try to connect, and if we can't wait a short while
  261. try:
  262. self.cc_session = isc.cc.Session(self.msgq_socket_file)
  263. except isc.cc.session.SessionError:
  264. time.sleep(0.1)
  265. # start the configuration manager
  266. if self.verbose:
  267. sys.stdout.write("[bind10] Starting b10-cfgmgr\n")
  268. try:
  269. bind_cfgd = ProcessInfo("b10-cfgmgr", ["b10-cfgmgr"],
  270. c_channel_env, uid=self.uid,
  271. username=self.username)
  272. except Exception as e:
  273. c_channel.process.kill()
  274. return "Unable to start b10-cfgmgr; " + str(e)
  275. self.processes[bind_cfgd.pid] = bind_cfgd
  276. if self.verbose:
  277. sys.stdout.write("[bind10] Started b10-cfgmgr (PID %d)\n" %
  278. bind_cfgd.pid)
  279. # sleep until b10-cfgmgr is fully up and running, this is a good place
  280. # to have a (short) timeout on synchronized groupsend/receive
  281. # TODO: replace the sleep by a listen for ConfigManager started
  282. # message
  283. time.sleep(1)
  284. if self.verbose:
  285. sys.stdout.write("[bind10] starting ccsession\n")
  286. self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
  287. self.config_handler, self.command_handler)
  288. self.ccs.start()
  289. if self.verbose:
  290. sys.stdout.write("[bind10] ccsession started\n")
  291. # start b10-auth
  292. # XXX: this must be read from the configuration manager in the future
  293. authargs = ['b10-auth', '-p', str(self.auth_port)]
  294. if self.address:
  295. authargs += ['-a', str(self.address)]
  296. if self.nocache:
  297. authargs += ['-n']
  298. if self.uid:
  299. authargs += ['-u', str(self.uid)]
  300. if self.verbose:
  301. authargs += ['-v']
  302. sys.stdout.write("Starting b10-auth using port %d" %
  303. self.auth_port)
  304. if self.address:
  305. sys.stdout.write(" on %s" % str(self.address))
  306. sys.stdout.write("\n")
  307. try:
  308. auth = ProcessInfo("b10-auth", authargs,
  309. c_channel_env)
  310. except Exception as e:
  311. c_channel.process.kill()
  312. bind_cfgd.process.kill()
  313. xfrout.process.kill()
  314. return "Unable to start b10-auth; " + str(e)
  315. self.processes[auth.pid] = auth
  316. if self.verbose:
  317. sys.stdout.write("[bind10] Started b10-auth (PID %d)\n" % auth.pid)
  318. # everything after the authoritative server can run as non-root
  319. if self.uid is not None:
  320. posix.setuid(self.uid)
  321. # start the xfrout before auth-server, to make sure every xfr-query can
  322. # be processed properly.
  323. xfrout_args = ['b10-xfrout']
  324. if self.verbose:
  325. sys.stdout.write("[bind10] Starting b10-xfrout\n")
  326. xfrout_args += ['-v']
  327. try:
  328. xfrout = ProcessInfo("b10-xfrout", xfrout_args,
  329. c_channel_env )
  330. except Exception as e:
  331. c_channel.process.kill()
  332. bind_cfgd.process.kill()
  333. return "Unable to start b10-xfrout; " + str(e)
  334. self.processes[xfrout.pid] = xfrout
  335. if self.verbose:
  336. sys.stdout.write("[bind10] Started b10-xfrout (PID %d)\n" %
  337. xfrout.pid)
  338. # start b10-xfrin
  339. xfrin_args = ['b10-xfrin']
  340. if self.verbose:
  341. sys.stdout.write("[bind10] Starting b10-xfrin\n")
  342. xfrin_args += ['-v']
  343. try:
  344. xfrind = ProcessInfo("b10-xfrin", xfrin_args,
  345. c_channel_env)
  346. except Exception as e:
  347. c_channel.process.kill()
  348. bind_cfgd.process.kill()
  349. xfrout.process.kill()
  350. auth.process.kill()
  351. return "Unable to start b10-xfrin; " + str(e)
  352. self.processes[xfrind.pid] = xfrind
  353. if self.verbose:
  354. sys.stdout.write("[bind10] Started b10-xfrin (PID %d)\n" %
  355. xfrind.pid)
  356. # start b10-zonemgr
  357. zonemgr_args = ['b10-zonemgr']
  358. if self.verbose:
  359. sys.stdout.write("[bind10] Starting b10-zonemgr\n")
  360. zonemgr_args += ['-v']
  361. try:
  362. zonemgr = ProcessInfo("b10-zonemgr", zonemgr_args,
  363. c_channel_env)
  364. except Exception as e:
  365. c_channel.process.kill()
  366. bind_cfgd.process.kill()
  367. xfrout.process.kill()
  368. auth.process.kill()
  369. xfrind.process.kill()
  370. return "Unable to start b10-zonemgr; " + str(e)
  371. self.processes[zonemgr.pid] = zonemgr
  372. if self.verbose:
  373. sys.stdout.write("[bind10] Started b10-zonemgr(PID %d)\n" %
  374. zonemgr.pid)
  375. # start the b10-cmdctl
  376. # XXX: we hardcode port 8080
  377. cmdctl_args = ['b10-cmdctl']
  378. if self.verbose:
  379. sys.stdout.write("[bind10] Starting b10-cmdctl on port 8080\n")
  380. cmdctl_args += ['-v']
  381. try:
  382. cmd_ctrld = ProcessInfo("b10-cmdctl", cmdctl_args,
  383. c_channel_env)
  384. except Exception as e:
  385. c_channel.process.kill()
  386. bind_cfgd.process.kill()
  387. xfrout.process.kill()
  388. auth.process.kill()
  389. xfrind.process.kill()
  390. zonemgr.process.kill()
  391. return "Unable to start b10-cmdctl; " + str(e)
  392. self.processes[cmd_ctrld.pid] = cmd_ctrld
  393. if self.verbose:
  394. sys.stdout.write("[bind10] Started b10-cmdctl (PID %d)\n" %
  395. cmd_ctrld.pid)
  396. self.runnable = True
  397. return None
  398. def stop_all_processes(self):
  399. """Stop all processes."""
  400. cmd = { "command": ['shutdown']}
  401. self.cc_session.group_sendmsg(cmd, 'Boss', 'Cmdctl')
  402. self.cc_session.group_sendmsg(cmd, "Boss", "ConfigManager")
  403. self.cc_session.group_sendmsg(cmd, "Boss", "Auth")
  404. self.cc_session.group_sendmsg(cmd, "Boss", "Xfrout")
  405. self.cc_session.group_sendmsg(cmd, "Boss", "Xfrin")
  406. self.cc_session.group_sendmsg(cmd, "Boss", "Zonemgr")
  407. def stop_process(self, process):
  408. """Stop the given process, friendly-like."""
  409. # XXX nothing yet
  410. pass
  411. def shutdown(self):
  412. """Stop the BoB instance."""
  413. if self.verbose:
  414. sys.stdout.write("[bind10] Stopping the server.\n")
  415. # first try using the BIND 10 request to stop
  416. try:
  417. self.stop_all_processes()
  418. except:
  419. pass
  420. # XXX: some delay probably useful... how much is uncertain
  421. time.sleep(0.5)
  422. self.reap_children()
  423. # next try sending a SIGTERM
  424. processes_to_stop = list(self.processes.values())
  425. for proc_info in processes_to_stop:
  426. if self.verbose:
  427. sys.stdout.write("[bind10] Sending SIGTERM to %s (PID %d).\n" %
  428. (proc_info.name, proc_info.pid))
  429. try:
  430. proc_info.process.terminate()
  431. except OSError:
  432. # ignore these (usually ESRCH because the child
  433. # finally exited)
  434. pass
  435. # finally, send SIGKILL (unmaskable termination) until everybody dies
  436. while self.processes:
  437. # XXX: some delay probably useful... how much is uncertain
  438. time.sleep(0.1)
  439. self.reap_children()
  440. processes_to_stop = list(self.processes.values())
  441. for proc_info in processes_to_stop:
  442. if self.verbose:
  443. sys.stdout.write("[bind10] Sending SIGKILL to %s (PID %d).\n" %
  444. (proc_info.name, proc_info.pid))
  445. try:
  446. proc_info.process.kill()
  447. except OSError:
  448. # ignore these (usually ESRCH because the child
  449. # finally exited)
  450. pass
  451. if self.verbose:
  452. sys.stdout.write("[bind10] All processes ended, server done.\n")
  453. def reap_children(self):
  454. """Check to see if any of our child processes have exited,
  455. and note this for later handling.
  456. """
  457. while True:
  458. try:
  459. (pid, exit_status) = os.waitpid(-1, os.WNOHANG)
  460. except OSError as o:
  461. if o.errno == errno.ECHILD: break
  462. # XXX: should be impossible to get any other error here
  463. raise
  464. if pid == 0: break
  465. if pid in self.processes:
  466. proc_info = self.processes.pop(pid)
  467. proc_info.restart_schedule.set_run_stop_time()
  468. self.dead_processes[proc_info.pid] = proc_info
  469. if self.verbose:
  470. sys.stdout.write("[bind10] Process %s (PID %d) died.\n" %
  471. (proc_info.name, proc_info.pid))
  472. if proc_info.name == "b10-msgq":
  473. if self.verbose and self.runnable:
  474. sys.stdout.write(
  475. "[bind10] The b10-msgq process died, shutting down.\n")
  476. self.runnable = False
  477. else:
  478. sys.stdout.write("[bind10] Unknown child pid %d exited.\n" % pid)
  479. def restart_processes(self):
  480. """Restart any dead processes.
  481. Returns the time when the next process is ready to be restarted.
  482. If the server is shutting down, returns 0.
  483. If there are no processes, returns None.
  484. The values returned can be safely passed into select() as the
  485. timeout value."""
  486. next_restart = None
  487. # if we're shutting down, then don't restart
  488. if not self.runnable:
  489. return 0
  490. # otherwise look through each dead process and try to restart
  491. still_dead = {}
  492. now = time.time()
  493. for proc_info in self.dead_processes.values():
  494. restart_time = proc_info.restart_schedule.get_restart_time(now)
  495. if restart_time > now:
  496. if (next_restart is None) or (next_restart > restart_time):
  497. next_restart = restart_time
  498. still_dead[proc_info.pid] = proc_info
  499. else:
  500. if self.verbose:
  501. sys.stdout.write("[bind10] Resurrecting dead %s process...\n" %
  502. proc_info.name)
  503. try:
  504. proc_info.respawn()
  505. self.processes[proc_info.pid] = proc_info
  506. if self.verbose:
  507. sys.stdout.write("[bind10] Resurrected %s (PID %d)\n" %
  508. (proc_info.name, proc_info.pid))
  509. except:
  510. still_dead[proc_info.pid] = proc_info
  511. # remember any processes that refuse to be resurrected
  512. self.dead_processes = still_dead
  513. # return the time when the next process is ready to be restarted
  514. return next_restart
  515. # global variables, needed for signal handlers
  516. options = None
  517. boss_of_bind = None
  518. def reaper(signal_number, stack_frame):
  519. """A child process has died (SIGCHLD received)."""
  520. # don't do anything...
  521. # the Python signal handler has been set up to write
  522. # down a pipe, waking up our select() bit
  523. pass
  524. def get_signame(signal_number):
  525. """Return the symbolic name for a signal."""
  526. for sig in dir(signal):
  527. if sig.startswith("SIG") and sig[3].isalnum():
  528. if getattr(signal, sig) == signal_number:
  529. return sig
  530. return "Unknown signal %d" % signal_number
  531. # XXX: perhaps register atexit() function and invoke that instead
  532. def fatal_signal(signal_number, stack_frame):
  533. """We need to exit (SIGINT or SIGTERM received)."""
  534. global options
  535. global boss_of_bind
  536. if options.verbose:
  537. sys.stdout.write("[bind10] Received %s.\n" % get_signame(signal_number))
  538. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  539. boss_of_bind.runnable = False
  540. def check_port(option, opt_str, value, parser):
  541. """Function to insure that the port we are passed is actually
  542. a valid port number. Used by OptionParser() on startup."""
  543. if not re.match('^(6553[0-5]|655[0-2]\d|65[0-4]\d\d|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{0,3}|0)$', value):
  544. raise OptionValueError("%s requires a port number (0-65535)" % opt_str)
  545. if (opt_str == '-m' or opt_str == '--msgq-port'):
  546. parser.values.msgq_port = value
  547. elif (opt_str == '-p' or opt_str == '--port'):
  548. parser.values.auth_port = value
  549. else:
  550. raise OptionValueError("Unknown option " + opt_str)
  551. def check_addr(option, opt_str, value, parser):
  552. """Function to insure that the address we are passed is actually
  553. a valid address. Used by OptionParser() on startup."""
  554. try:
  555. IPAddr(value)
  556. except:
  557. raise OptionValueError("%s requires a valid IPv4 or IPv6 address" % opt_str)
  558. if (opt_str == '-a' or opt_str == '--address'):
  559. parser.values.address = value
  560. else:
  561. raise OptionValueError("Unknown option " + opt_str)
  562. def main():
  563. global options
  564. global boss_of_bind
  565. # Enforce line buffering on stdout, even when not a TTY
  566. sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)
  567. # Parse any command-line options.
  568. parser = OptionParser(version=VERSION)
  569. parser.add_option("-a", "--address", dest="address", type="string",
  570. action="callback", callback=check_addr, default='',
  571. help="address the b10-auth daemon will use (default: listen on all addresses)")
  572. parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
  573. type="string", default=None,
  574. help="UNIX domain socket file the b10-msgq daemon will use")
  575. parser.add_option("-n", "--no-cache", action="store_true", dest="nocache",
  576. default=False, help="disable hot-spot cache in b10-auth")
  577. parser.add_option("-p", "--port", dest="auth_port", type="string",
  578. action="callback", callback=check_port, default="5300",
  579. help="port the b10-auth daemon will use (default 5300)")
  580. parser.add_option("-u", "--user", dest="user",
  581. type="string", default=None,
  582. help="Change user after startup (must run as root)")
  583. parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
  584. help="display more about what is going on")
  585. (options, args) = parser.parse_args()
  586. if args:
  587. parser.print_help()
  588. sys.exit(1)
  589. # Check user ID.
  590. setuid = None
  591. username = None
  592. if options.user:
  593. # Try getting information about the user, assuming UID passed.
  594. try:
  595. pw_ent = pwd.getpwuid(int(options.user))
  596. setuid = pw_ent.pw_uid
  597. username = pw_ent.pw_name
  598. except ValueError:
  599. pass
  600. except KeyError:
  601. pass
  602. # Next try getting information about the user, assuming user name
  603. # passed.
  604. # If the information is both a valid user name and user number, we
  605. # prefer the name because we try it second. A minor point, hopefully.
  606. try:
  607. pw_ent = pwd.getpwnam(options.user)
  608. setuid = pw_ent.pw_uid
  609. username = pw_ent.pw_name
  610. except KeyError:
  611. pass
  612. if setuid is None:
  613. sys.stderr.write("bind10: invalid user: '%s'\n" % options.user)
  614. sys.exit(1)
  615. # Announce startup.
  616. if options.verbose:
  617. sys.stdout.write("%s\n" % VERSION)
  618. # TODO: set process name, perhaps by:
  619. # http://code.google.com/p/procname/
  620. # http://github.com/lericson/procname/
  621. # Create wakeup pipe for signal handlers
  622. wakeup_pipe = os.pipe()
  623. signal.set_wakeup_fd(wakeup_pipe[1])
  624. # Set signal handlers for catching child termination, as well
  625. # as our own demise.
  626. signal.signal(signal.SIGCHLD, reaper)
  627. signal.siginterrupt(signal.SIGCHLD, False)
  628. signal.signal(signal.SIGINT, fatal_signal)
  629. signal.signal(signal.SIGTERM, fatal_signal)
  630. # Go bob!
  631. boss_of_bind = BoB(options.msgq_socket_file, int(options.auth_port),
  632. options.address, options.nocache, options.verbose,
  633. setuid, username)
  634. startup_result = boss_of_bind.startup()
  635. if startup_result:
  636. sys.stderr.write("[bind10] Error on startup: %s\n" % startup_result)
  637. sys.exit(1)
  638. sys.stdout.write("[bind10] BIND 10 started\n")
  639. # In our main loop, we check for dead processes or messages
  640. # on the c-channel.
  641. wakeup_fd = wakeup_pipe[0]
  642. ccs_fd = boss_of_bind.ccs.get_socket().fileno()
  643. while boss_of_bind.runnable:
  644. # clean up any processes that exited
  645. boss_of_bind.reap_children()
  646. next_restart = boss_of_bind.restart_processes()
  647. if next_restart is None:
  648. wait_time = None
  649. else:
  650. wait_time = max(next_restart - time.time(), 0)
  651. # select() can raise EINTR when a signal arrives,
  652. # even if they are resumable, so we have to catch
  653. # the exception
  654. try:
  655. (rlist, wlist, xlist) = select.select([wakeup_fd, ccs_fd], [], [],
  656. wait_time)
  657. except select.error as err:
  658. if err.args[0] == errno.EINTR:
  659. (rlist, wlist, xlist) = ([], [], [])
  660. else:
  661. sys.stderr.write("[bind10] Error with select(); %s\n" % err)
  662. break
  663. for fd in rlist + xlist:
  664. if fd == ccs_fd:
  665. try:
  666. boss_of_bind.ccs.check_command()
  667. except isc.cc.session.ProtocolError:
  668. if options.verbose:
  669. sys.stderr.write("[bind10] msgq channel disappeared.\n")
  670. break
  671. elif fd == wakeup_fd:
  672. os.read(wakeup_fd, 32)
  673. # shutdown
  674. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  675. boss_of_bind.shutdown()
  676. sys.exit(0)
  677. if __name__ == "__main__":
  678. main()