bind10.py.in 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. #!@PYTHON@
  2. import sys; sys.path.append ('@@PYTHONPATH@@')
  3. import os
  4. """\
  5. This file implements the Boss of Bind (BoB, or bob) program.
  6. It's purpose is to start up the BIND 10 system, and then manage the
  7. processes, by starting and stopping processes, plus restarting
  8. processes that exit.
  9. To start the system, it first runs the c-channel program (msgq), then
  10. connects to that. It then runs the configuration manager, and reads
  11. its own configuration. Then it proceeds to starting other modules.
  12. The Python subprocess module is used for starting processes, but
  13. because this is not efficient for managing groups of processes,
  14. SIGCHLD signals are caught and processed using the signal module.
  15. Most of the logic is contained in the BoB class. However, since Python
  16. requires that signal processing happen in the main thread, we do
  17. signal handling outside of that class, in the code running for
  18. __main__.
  19. """
  20. # If B10_FROM_SOURCE is set in the environment, we use data files
  21. # from a directory relative to that, otherwise we use the ones
  22. # installed on the system
  23. if "B10_FROM_SOURCE" in os.environ:
  24. SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] + "/src/bin/bind10/bob.spec"
  25. else:
  26. PREFIX = "@prefix@"
  27. DATAROOTDIR = "@datarootdir@"
  28. SPECFILE_LOCATION = "@datadir@/@PACKAGE@/bob.spec".replace("${datarootdir}", DATAROOTDIR).replace("${prefix}", PREFIX)
  29. # TODO: start up statistics thingy
  30. import subprocess
  31. import signal
  32. import os
  33. import re
  34. import errno
  35. import time
  36. import select
  37. import pprint
  38. from optparse import OptionParser, OptionValueError
  39. import isc.cc
  40. import isc
  41. # This is the version that gets displayed to the user.
  42. __version__ = "v20100225"
  43. # Nothing at all to do with the 1990-12-10 article here:
  44. # http://www.subgenius.com/subg-digest/v2/0056.html
  45. class ProcessInfo:
  46. """Information about a process"""
  47. dev_null = open("/dev/null", "w")
  48. def _spawn(self):
  49. if self.dev_null_stdout:
  50. spawn_stdout = self.dev_null
  51. else:
  52. spawn_stdout = None
  53. spawn_env = self.env
  54. spawn_env['PATH'] = os.environ['PATH']
  55. if 'B10_FROM_SOURCE' in os.environ:
  56. spawn_env['B10_FROM_SOURCE'] = os.environ['B10_FROM_SOURCE']
  57. else:
  58. spawn_env['PATH'] = "@@LIBEXECDIR@@:" + spawn_env['PATH']
  59. if 'PYTHON_EXEC' in os.environ:
  60. spawn_env['PYTHON_EXEC'] = os.environ['PYTHON_EXEC']
  61. if 'PYTHONPATH' in os.environ:
  62. spawn_env['PYTHONPATH'] = os.environ['PYTHONPATH']
  63. self.process = subprocess.Popen(self.args,
  64. stdin=subprocess.PIPE,
  65. stdout=spawn_stdout,
  66. stderr=spawn_stdout,
  67. close_fds=True,
  68. env=spawn_env,)
  69. self.pid = self.process.pid
  70. def __init__(self, name, args, env={}, dev_null_stdout=False):
  71. self.name = name
  72. self.args = args
  73. self.env = env
  74. self.dev_null_stdout = dev_null_stdout
  75. self._spawn()
  76. def respawn(self):
  77. self._spawn()
  78. class BoB:
  79. """Boss of BIND class."""
  80. def __init__(self, c_channel_port=9912, verbose=False):
  81. """Initialize the Boss of BIND. This is a singleton (only one
  82. can run).
  83. The c_channel_port specifies the TCP/IP port that the msgq
  84. process listens on. If verbose is True, then the boss reports
  85. what it is doing.
  86. """
  87. self.verbose = verbose
  88. self.c_channel_port = c_channel_port
  89. self.cc_session = None
  90. self.ccs = None
  91. self.processes = {}
  92. self.dead_processes = {}
  93. self.runnable = False
  94. def config_handler(self, new_config):
  95. if self.verbose:
  96. print("[XX] handling new config:")
  97. print(new_config)
  98. answer = isc.config.ccsession.create_answer(0)
  99. return answer
  100. # TODO
  101. def command_handler(self, command, args):
  102. if self.verbose:
  103. print("[XX] Boss got command:")
  104. print(command)
  105. answer = [ 1, "Command not implemented" ]
  106. if type(command) != str:
  107. answer = isc.config.ccsession.create_answer(1, "bad command")
  108. else:
  109. cmd = command
  110. if cmd == "shutdown":
  111. print("[XX] got shutdown command")
  112. self.runnable = False
  113. answer = isc.config.ccsession.create_answer(0)
  114. elif cmd == "print_message":
  115. if args:
  116. print(args)
  117. answer = isc.config.ccsession.create_answer(0, args)
  118. elif cmd == "print_settings":
  119. print("Full Config:")
  120. full_config = self.ccs.get_full_config()
  121. for item in full_config:
  122. print(item + ": " + str(full_config[item]))
  123. answer = isc.config.ccsession.create_answer(0)
  124. else:
  125. answer = isc.config.ccsession.create_answer(1, "Unknown command")
  126. return answer
  127. def startup(self):
  128. """Start the BoB instance.
  129. Returns None if successful, otherwise an string describing the
  130. problem.
  131. """
  132. # try to connect to the c-channel daemon,
  133. # to see if it is already running
  134. c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
  135. if self.verbose:
  136. sys.stdout.write("Checking for already running msgq\n")
  137. # try to connect, and if we can't wait a short while
  138. try:
  139. self.cc_session = isc.cc.Session(self.c_channel_port)
  140. return "msgq already running, cannot start"
  141. except isc.cc.session.SessionError:
  142. pass
  143. # start the c-channel daemon
  144. if self.verbose:
  145. sys.stdout.write("Starting msgq using port %d\n" %
  146. self.c_channel_port)
  147. try:
  148. c_channel = ProcessInfo("msgq", "msgq", c_channel_env, True)
  149. except Exception as e:
  150. return "Unable to start msgq; " + str(e)
  151. self.processes[c_channel.pid] = c_channel
  152. if self.verbose:
  153. sys.stdout.write("Started msgq (PID %d)\n" % c_channel.pid)
  154. # now connect to the c-channel
  155. cc_connect_start = time.time()
  156. while self.cc_session is None:
  157. # if we have been trying for "a while" give up
  158. if (time.time() - cc_connect_start) > 5:
  159. c_channel.process.kill()
  160. return "Unable to connect to c-channel after 5 seconds"
  161. # try to connect, and if we can't wait a short while
  162. try:
  163. self.cc_session = isc.cc.Session(self.c_channel_port)
  164. except isc.cc.session.SessionError:
  165. time.sleep(0.1)
  166. #self.cc_session.group_subscribe("Boss", "boss")
  167. # start the configuration manager
  168. if self.verbose:
  169. sys.stdout.write("Starting b10-cfgmgr\n")
  170. try:
  171. bind_cfgd = ProcessInfo("b10-cfgmgr", "b10-cfgmgr")
  172. except Exception as e:
  173. c_channel.process.kill()
  174. return "Unable to start b10-cfgmgr; " + str(e)
  175. self.processes[bind_cfgd.pid] = bind_cfgd
  176. if self.verbose:
  177. sys.stdout.write("Started b10-cfgmgr (PID %d)\n" % bind_cfgd.pid)
  178. # TODO: once this interface is done, replace self.cc_session
  179. # by this one
  180. # sleep until b10-cfgmgr is fully up and running, this is a good place
  181. # to have a (short) timeout on synchronized groupsend/receive
  182. time.sleep(1)
  183. if self.verbose:
  184. print("[XX] starting ccsession")
  185. self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION, self.config_handler, self.command_handler)
  186. self.ccs.start()
  187. if self.verbose:
  188. print("[XX] ccsession started")
  189. # start the parking lot
  190. # XXX: this must be read from the configuration manager in the future
  191. # XXX: we hardcode port 5300
  192. if self.verbose:
  193. sys.stdout.write("Starting b10-auth on port 5300\n")
  194. try:
  195. auth = ProcessInfo("b10-auth", ["b10-auth", "-p", "5300"])
  196. except Exception as e:
  197. c_channel.process.kill()
  198. bind_cfgd.process.kill()
  199. return "Unable to start b10-auth; " + str(e)
  200. self.processes[auth.pid] = auth
  201. if self.verbose:
  202. sys.stdout.write("Started b10-auth (PID %d)\n" % auth.pid)
  203. # start the b10-cmdctl
  204. # XXX: we hardcode port 8080
  205. if self.verbose:
  206. sys.stdout.write("Starting b10-cmdctl on port 8080\n")
  207. try:
  208. cmd_ctrld = ProcessInfo("b10-cmdctl", ['b10-cmdctl'])
  209. except Exception as e:
  210. c_channel.process.kill()
  211. bind_cfgd.process.kill()
  212. auth.process.kill()
  213. return "Unable to start b10-cmdctl; " + str(e)
  214. self.processes[cmd_ctrld.pid] = cmd_ctrld
  215. if self.verbose:
  216. sys.stdout.write("Started b10-cmdctl (PID %d)\n" % cmd_ctrld.pid)
  217. self.runnable = True
  218. return None
  219. def stop_all_processes(self):
  220. """Stop all processes."""
  221. cmd = { "command": ['shutdown']}
  222. self.cc_session.group_sendmsg(cmd, 'Boss', 'Cmd-Ctrld')
  223. self.cc_session.group_sendmsg(cmd, "Boss", "ConfigManager")
  224. self.cc_session.group_sendmsg(cmd, "Boss", "Auth")
  225. def stop_process(self, process):
  226. """Stop the given process, friendly-like."""
  227. # XXX nothing yet
  228. pass
  229. def shutdown(self):
  230. """Stop the BoB instance."""
  231. if self.verbose:
  232. sys.stdout.write("Stopping the server.\n")
  233. # first try using the BIND 10 request to stop
  234. try:
  235. self.stop_all_processes()
  236. except:
  237. pass
  238. # XXX: some delay probably useful... how much is uncertain
  239. time.sleep(0.1)
  240. self.reap_children()
  241. # next try sending a SIGTERM
  242. processes_to_stop = list(self.processes.values())
  243. unstopped_processes = []
  244. for proc_info in processes_to_stop:
  245. if self.verbose:
  246. sys.stdout.write("Sending SIGTERM to %s (PID %d).\n" %
  247. (proc_info.name, proc_info.pid))
  248. try:
  249. proc_info.process.terminate()
  250. except OSError as o:
  251. # ignore these (usually ESRCH because the child
  252. # finally exited)
  253. pass
  254. # XXX: some delay probably useful... how much is uncertain
  255. time.sleep(0.1)
  256. self.reap_children()
  257. # finally, send a SIGKILL (unmaskable termination)
  258. processes_to_stop = unstopped_processes
  259. for proc_info in processes_to_stop:
  260. if self.verbose:
  261. sys.stdout.write("Sending SIGKILL to %s (PID %d).\n" %
  262. (proc_info.name, proc_info.pid))
  263. try:
  264. proc_info.process.kill()
  265. except OSError as o:
  266. # ignore these (usually ESRCH because the child
  267. # finally exited)
  268. pass
  269. if self.verbose:
  270. sys.stdout.write("All processes ended, server done.\n")
  271. def reap_children(self):
  272. """Check to see if any of our child processes have exited,
  273. and note this for later handling.
  274. """
  275. while True:
  276. try:
  277. (pid, exit_status) = os.waitpid(-1, os.WNOHANG)
  278. except OSError as o:
  279. if o.errno == errno.ECHILD: break
  280. # XXX: should be impossible to get any other error here
  281. raise
  282. if pid == 0: break
  283. if pid in self.processes:
  284. proc_info = self.processes.pop(pid)
  285. self.dead_processes[proc_info.pid] = proc_info
  286. if self.verbose:
  287. sys.stdout.write("Process %s (PID %d) died.\n" %
  288. (proc_info.name, proc_info.pid))
  289. if proc_info.name == "msgq":
  290. if self.verbose and self.runnable:
  291. sys.stdout.write(
  292. "The msgq process died, shutting down.\n")
  293. self.runnable = False
  294. else:
  295. sys.stdout.write("Unknown child pid %d exited.\n" % pid)
  296. # 'old' command style, uncommented for now
  297. # move the handling below move to command_handler please
  298. #def recv_and_process_cc_msg(self):
  299. #"""Receive and process the next message on the c-channel,
  300. #if any."""
  301. #self.ccs.checkCommand()
  302. #msg, envelope = self.cc_session.group_recvmsg(False)
  303. #print(msg)
  304. #if msg is None:
  305. # return
  306. #if not ((type(msg) is dict) and (type(envelope) is dict)):
  307. # if self.verbose:
  308. # sys.stdout.write("Non-dictionary message\n")
  309. # return
  310. #if not "command" in msg:
  311. # if self.verbose:
  312. # if "msg" in envelope:
  313. # del envelope['msg']
  314. # sys.stdout.write("Unknown message received\n")
  315. # sys.stdout.write(pprint.pformat(envelope) + "\n")
  316. # sys.stdout.write(pprint.pformat(msg) + "\n")
  317. # return
  318. #cmd = msg['command']
  319. #if not (type(cmd) is list):
  320. # if self.verbose:
  321. # sys.stdout.write("Non-list command\n")
  322. # return
  323. #
  324. # done checking and extracting... time to execute the command
  325. #if cmd[0] == "shutdown":
  326. # if self.verbose:
  327. # sys.stdout.write("shutdown command received\n")
  328. # self.runnable = False
  329. # # XXX: reply here?
  330. #elif cmd[0] == "getProcessList":
  331. # if self.verbose:
  332. # sys.stdout.write("getProcessList command received\n")
  333. # live_processes = [ ]
  334. # for proc_info in processes:
  335. # live_processes.append({ "name": proc_info.name,
  336. # "args": proc_info.args,
  337. # "pid": proc_info.pid, })
  338. # dead_processes = [ ]
  339. # for proc_info in dead_processes:
  340. # dead_processes.append({ "name": proc_info.name,
  341. # "args": proc_info.args, })
  342. # cc.group_reply(envelope, { "response": cmd,
  343. # "sent": msg["sent"],
  344. # "live_processes": live_processes,
  345. # "dead_processes": dead_processes, })
  346. #else:
  347. # if self.verbose:
  348. # sys.stdout.write("Unknown command %s\n" % str(cmd))
  349. def restart_processes(self):
  350. """Restart any dead processes."""
  351. # XXX: this needs a back-off algorithm
  352. # if we're shutting down, then don't restart
  353. if not self.runnable:
  354. return
  355. # otherwise look through each dead process and try to restart
  356. still_dead = {}
  357. for proc_info in self.dead_processes.values():
  358. if self.verbose:
  359. sys.stdout.write("Resurrecting dead %s process...\n" %
  360. proc_info.name)
  361. try:
  362. proc_info.respawn()
  363. self.processes[proc_info.pid] = proc_info
  364. if self.verbose:
  365. sys.stdout.write("Resurrected %s (PID %d)\n" %
  366. (proc_info.name, proc_info.pid))
  367. except:
  368. still_dead[proc_info.pid] = proc_info
  369. # remember any processes that refuse to be resurrected
  370. self.dead_processes = still_dead
  371. def reaper(signal_number, stack_frame):
  372. """A child process has died (SIGCHLD received)."""
  373. # don't do anything...
  374. # the Python signal handler has been set up to write
  375. # down a pipe, waking up our select() bit
  376. pass
  377. def get_signame(signal_number):
  378. """Return the symbolic name for a signal."""
  379. for sig in dir(signal):
  380. if sig.startswith("SIG") and sig[3].isalnum():
  381. if getattr(signal, sig) == signal_number:
  382. return sig
  383. return "Unknown signal %d" % signal_number
  384. # XXX: perhaps register atexit() function and invoke that instead
  385. def fatal_signal(signal_number, stack_frame):
  386. """We need to exit (SIGINT or SIGTERM received)."""
  387. global options
  388. global boss_of_bind
  389. if options.verbose:
  390. sys.stdout.write("Received %s.\n" % get_signame(signal_number))
  391. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  392. boss_of_bind.runnable = False
  393. def check_port(option, opt_str, value, parser):
  394. """Function to insure that the port we are passed is actually
  395. a valid port number. Used by OptionParser() on startup."""
  396. if not re.match('^(6553[0-5]|655[0-2]\d|65[0-4]\d\d|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{0,3}|0)$', value):
  397. raise OptionValueError("%s requires a port number (0-65535)" % opt_str)
  398. parser.values.msgq_port = value
  399. def main():
  400. global options
  401. global boss_of_bind
  402. # Parse any command-line options.
  403. parser = OptionParser(version=__version__)
  404. parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
  405. help="display more about what is going on")
  406. parser.add_option("-m", "--msgq-port", dest="msgq_port", type="string",
  407. action="callback", callback=check_port, default="9912",
  408. help="port the msgq daemon will use")
  409. (options, args) = parser.parse_args()
  410. # Announce startup.
  411. if options.verbose:
  412. sys.stdout.write("BIND 10 %s\n" % __version__)
  413. # TODO: set process name, perhaps by:
  414. # http://code.google.com/p/procname/
  415. # http://github.com/lericson/procname/
  416. # Create wakeup pipe for signal handlers
  417. wakeup_pipe = os.pipe()
  418. signal.set_wakeup_fd(wakeup_pipe[1])
  419. # Set signal handlers for catching child termination, as well
  420. # as our own demise.
  421. signal.signal(signal.SIGCHLD, reaper)
  422. signal.siginterrupt(signal.SIGCHLD, False)
  423. signal.signal(signal.SIGINT, fatal_signal)
  424. signal.signal(signal.SIGTERM, fatal_signal)
  425. # Go bob!
  426. boss_of_bind = BoB(int(options.msgq_port), options.verbose)
  427. startup_result = boss_of_bind.startup()
  428. if startup_result:
  429. sys.stderr.write("Error on startup: %s\n" % startup_result)
  430. sys.exit(1)
  431. # In our main loop, we check for dead processes or messages
  432. # on the c-channel.
  433. wakeup_fd = wakeup_pipe[0]
  434. ccs_fd = boss_of_bind.ccs.get_socket().fileno()
  435. while boss_of_bind.runnable:
  436. # clean up any processes that exited
  437. boss_of_bind.reap_children()
  438. boss_of_bind.restart_processes()
  439. # XXX: get time for next restart for timeout
  440. # select() can raise EINTR when a signal arrives,
  441. # even if they are resumable, so we have to catch
  442. # the exception
  443. try:
  444. (rlist, wlist, xlist) = select.select([wakeup_fd, ccs_fd], [], [])
  445. except select.error as err:
  446. if err.args[0] == errno.EINTR:
  447. (rlist, wlist, xlist) = ([], [], [])
  448. else:
  449. sys.stderr.write("Error with select(); %s\n" % err)
  450. break
  451. for fd in rlist + xlist:
  452. if fd == ccs_fd:
  453. boss_of_bind.ccs.check_command()
  454. elif fd == wakeup_fd:
  455. os.read(wakeup_fd, 32)
  456. # shutdown
  457. signal.signal(signal.SIGCHLD, signal.SIG_DFL)
  458. boss_of_bind.shutdown()
  459. if __name__ == "__main__":
  460. main()