|
@@ -25,16 +25,40 @@ import sys
|
|
import re
|
|
import re
|
|
import errno
|
|
import errno
|
|
import time
|
|
import time
|
|
|
|
+import select
|
|
from optparse import OptionParser, OptionValueError
|
|
from optparse import OptionParser, OptionValueError
|
|
|
|
|
|
import ISC.CC
|
|
import ISC.CC
|
|
|
|
|
|
# This is the version that gets displayed to the user.
|
|
# This is the version that gets displayed to the user.
|
|
-__version__ = "v20091028 (Paving the DNS Parking Lot)"
|
|
|
|
|
|
+__version__ = "v20091030 (Paving the DNS Parking Lot)"
|
|
|
|
|
|
# Nothing at all to do with the 1990-12-10 article here:
|
|
# Nothing at all to do with the 1990-12-10 article here:
|
|
# http://www.subgenius.com/subg-digest/v2/0056.html
|
|
# http://www.subgenius.com/subg-digest/v2/0056.html
|
|
|
|
|
|
|
|
+class ProcessInfo:
|
|
|
|
+ """Information about a process"""
|
|
|
|
+
|
|
|
|
+ dev_null = open("/dev/null", "w")
|
|
|
|
+
|
|
|
|
+ def _spawn(self):
|
|
|
|
+ self.process = subprocess.Popen(self.args,
|
|
|
|
+ stdin=subprocess.PIPE,
|
|
|
|
+ stdout=self.dev_null,
|
|
|
|
+ stderr=self.dev_null,
|
|
|
|
+ close_fds=True,
|
|
|
|
+ env=self.env,)
|
|
|
|
+ self.pid = self.process.pid
|
|
|
|
+
|
|
|
|
+ def __init__(self, name, args, env={}):
|
|
|
|
+ self.name = name
|
|
|
|
+ self.args = args
|
|
|
|
+ self.env = env
|
|
|
|
+ self._spawn()
|
|
|
|
+
|
|
|
|
+ def respawn(self):
|
|
|
|
+ self._spawn()
|
|
|
|
+
|
|
class BoB:
|
|
class BoB:
|
|
"""Boss of BIND class."""
|
|
"""Boss of BIND class."""
|
|
def __init__(self, c_channel_port=9912, verbose=False):
|
|
def __init__(self, c_channel_port=9912, verbose=False):
|
|
@@ -47,11 +71,10 @@ class BoB:
|
|
"""
|
|
"""
|
|
self.verbose = True
|
|
self.verbose = True
|
|
self.c_channel_port = c_channel_port
|
|
self.c_channel_port = c_channel_port
|
|
- self.cc_process = None
|
|
|
|
self.cc_session = None
|
|
self.cc_session = None
|
|
self.processes = {}
|
|
self.processes = {}
|
|
self.dead_processes = {}
|
|
self.dead_processes = {}
|
|
- self.component_processes = {}
|
|
|
|
|
|
+ self.runnable = False
|
|
|
|
|
|
def startup(self):
|
|
def startup(self):
|
|
"""Start the BoB instance.
|
|
"""Start the BoB instance.
|
|
@@ -59,23 +82,17 @@ class BoB:
|
|
Returns None if successful, otherwise an string describing the
|
|
Returns None if successful, otherwise an string describing the
|
|
problem.
|
|
problem.
|
|
"""
|
|
"""
|
|
- dev_null = open("/dev/null", "w")
|
|
|
|
# start the c-channel daemon
|
|
# start the c-channel daemon
|
|
if self.verbose:
|
|
if self.verbose:
|
|
sys.stdout.write("Starting msgq using port %d\n" % self.c_channel_port)
|
|
sys.stdout.write("Starting msgq using port %d\n" % self.c_channel_port)
|
|
c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
|
|
c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
|
|
try:
|
|
try:
|
|
- c_channel = subprocess.Popen("msgq",
|
|
|
|
- stdin=subprocess.PIPE,
|
|
|
|
- stdout=dev_null,
|
|
|
|
- stderr=dev_null,
|
|
|
|
- close_fds=True,
|
|
|
|
- env=c_channel_env,)
|
|
|
|
|
|
+ c_channel = ProcessInfo("msgq", "msgq", c_channel_env)
|
|
except:
|
|
except:
|
|
return "Unable to start msgq"
|
|
return "Unable to start msgq"
|
|
self.processes[c_channel.pid] = c_channel
|
|
self.processes[c_channel.pid] = c_channel
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Started msgq with PID %d\n" % c_channel.pid)
|
|
|
|
|
|
+ sys.stdout.write("Started msgq (PID %d)\n" % c_channel.pid)
|
|
|
|
|
|
# now connect to the c-channel
|
|
# now connect to the c-channel
|
|
cc_connect_start = time.time()
|
|
cc_connect_start = time.time()
|
|
@@ -95,42 +112,30 @@ class BoB:
|
|
if self.verbose:
|
|
if self.verbose:
|
|
sys.stdout.write("Starting bind-cfgd\n")
|
|
sys.stdout.write("Starting bind-cfgd\n")
|
|
try:
|
|
try:
|
|
- bind_cfgd = subprocess.Popen("bind-cfgd",
|
|
|
|
- stdin=dev_null,
|
|
|
|
- stdout=dev_null,
|
|
|
|
- stderr=dev_null,
|
|
|
|
- close_fds=True,
|
|
|
|
- env={},)
|
|
|
|
|
|
+ bind_cfgd = ProcessInfo("bind-cfgd", "bind-cfgd")
|
|
except:
|
|
except:
|
|
- c_channel.kill()
|
|
|
|
|
|
+ c_channel.process.kill()
|
|
return "Unable to start bind-cfgd"
|
|
return "Unable to start bind-cfgd"
|
|
self.processes[bind_cfgd.pid] = bind_cfgd
|
|
self.processes[bind_cfgd.pid] = bind_cfgd
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Started bind-cfgd with PID %d\n" % bind_cfgd.pid)
|
|
|
|
|
|
+ sys.stdout.write("Started bind-cfgd (PID %d)\n" % bind_cfgd.pid)
|
|
|
|
|
|
# start the parking lot
|
|
# start the parking lot
|
|
# XXX: this must be read from the configuration manager in the future
|
|
# XXX: this must be read from the configuration manager in the future
|
|
# XXX: we hardcode port 5300
|
|
# XXX: we hardcode port 5300
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Starting parkinglot\n")
|
|
|
|
|
|
+ sys.stdout.write("Starting parkinglot on port 5300\n")
|
|
try:
|
|
try:
|
|
- parkinglot = subprocess.Popen(["parkinglot", "-p", "5300",],
|
|
|
|
- stdin=dev_null,
|
|
|
|
- stdout=dev_null,
|
|
|
|
- stderr=dev_null,
|
|
|
|
- close_fds=True,
|
|
|
|
- env={},)
|
|
|
|
|
|
+ parkinglot = ProcessInfo("parkinglot", ["parkinglot", "-p", "5300"])
|
|
except:
|
|
except:
|
|
c_channel.kill()
|
|
c_channel.kill()
|
|
bind_cfgd.kill()
|
|
bind_cfgd.kill()
|
|
return "Unable to start parkinglot"
|
|
return "Unable to start parkinglot"
|
|
self.processes[parkinglot.pid] = parkinglot
|
|
self.processes[parkinglot.pid] = parkinglot
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Started parkinglot with PID %d\n" % parkinglot.pid)
|
|
|
|
|
|
+ sys.stdout.write("Started parkinglot (PID %d)\n" % parkinglot.pid)
|
|
|
|
|
|
- # remember our super-important process
|
|
|
|
- self.cc_process = c_channel
|
|
|
|
-
|
|
|
|
|
|
+ self.runnable = True
|
|
return None
|
|
return None
|
|
|
|
|
|
def stop_all_processes(self):
|
|
def stop_all_processes(self):
|
|
@@ -147,36 +152,37 @@ class BoB:
|
|
if self.verbose:
|
|
if self.verbose:
|
|
sys.stdout.write("Stopping the server.\n")
|
|
sys.stdout.write("Stopping the server.\n")
|
|
# first try using the BIND 10 request to stop
|
|
# first try using the BIND 10 request to stop
|
|
- if self.cc_session:
|
|
|
|
- try:
|
|
|
|
- self.stop_all_processes()
|
|
|
|
- except:
|
|
|
|
- pass
|
|
|
|
|
|
+ try:
|
|
|
|
+ self.stop_all_processes()
|
|
|
|
+ except:
|
|
|
|
+ pass
|
|
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
|
|
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
|
|
# next try sending a SIGTERM
|
|
# next try sending a SIGTERM
|
|
processes_to_stop = list(self.processes.values())
|
|
processes_to_stop = list(self.processes.values())
|
|
unstopped_processes = []
|
|
unstopped_processes = []
|
|
- for process in processes_to_stop:
|
|
|
|
|
|
+ for proc_info in processes_to_stop:
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Sending SIGTERM to process %d.\n" % process.pid)
|
|
|
|
|
|
+ sys.stdout.write("Sending SIGTERM to %s (PID %d).\n" %
|
|
|
|
+ (proc_info.name, proc_info.pid))
|
|
try:
|
|
try:
|
|
- process.terminate()
|
|
|
|
|
|
+ proc_info.process.terminate()
|
|
except OSError as o:
|
|
except OSError as o:
|
|
# ignore these (usually ESRCH because the child
|
|
# ignore these (usually ESRCH because the child
|
|
# finally exited)
|
|
# finally exited)
|
|
pass
|
|
pass
|
|
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
|
|
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
|
|
- for process in processes_to_stop:
|
|
|
|
- (pid, exit_status) = os.waitpid(process.pid, os.WNOHANG)
|
|
|
|
|
|
+ for proc_info in processes_to_stop:
|
|
|
|
+ (pid, exit_status) = os.waitpid(proc_info.pid, os.WNOHANG)
|
|
if pid == 0:
|
|
if pid == 0:
|
|
- unstopped_processes.append(process)
|
|
|
|
|
|
+ unstopped_processes.append(proc_info)
|
|
# finally, send a SIGKILL (unmaskable termination)
|
|
# finally, send a SIGKILL (unmaskable termination)
|
|
processes_to_stop = unstopped_processes
|
|
processes_to_stop = unstopped_processes
|
|
- for process in processes_to_stop:
|
|
|
|
|
|
+ for proc_info in processes_to_stop:
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Sending SIGKILL to process %d.\n" % process.pid)
|
|
|
|
|
|
+ sys.stdout.write("Sending SIGKILL to %s (PID %d).\n" %
|
|
|
|
+ (proc_info.name, proc_info.pid))
|
|
try:
|
|
try:
|
|
- process.kill()
|
|
|
|
|
|
+ proc_info.process.kill()
|
|
except OSError as o:
|
|
except OSError as o:
|
|
# ignore these (usually ESRCH because the child
|
|
# ignore these (usually ESRCH because the child
|
|
# finally exited)
|
|
# finally exited)
|
|
@@ -192,16 +198,41 @@ class BoB:
|
|
Returns True if everything is okay, or False if a fatal error
|
|
Returns True if everything is okay, or False if a fatal error
|
|
has been detected and the program should exit.
|
|
has been detected and the program should exit.
|
|
"""
|
|
"""
|
|
- process = self.processes.pop(pid)
|
|
|
|
- self.dead_processes[process.pid] = process
|
|
|
|
|
|
+ proc_info = self.processes.pop(pid)
|
|
|
|
+ self.dead_processes[proc_info.pid] = proc_info
|
|
if self.verbose:
|
|
if self.verbose:
|
|
- sys.stdout.write("Process %d died.\n" % pid)
|
|
|
|
- if self.cc_process and (pid == self.cc_process.pid):
|
|
|
|
|
|
+ sys.stdout.write("Process %s (PID %d) died.\n" %
|
|
|
|
+ (proc_info.name, proc_info.pid))
|
|
|
|
+ if proc_info.name == "msgq":
|
|
if self.verbose:
|
|
if self.verbose:
|
|
sys.stdout.write("The msgq process died, shutting down.\n")
|
|
sys.stdout.write("The msgq process died, shutting down.\n")
|
|
- return False
|
|
|
|
- else:
|
|
|
|
- return True
|
|
|
|
|
|
+ self.runnable = False
|
|
|
|
+
|
|
|
|
+ def recv_and_process_cc_msg(self):
|
|
|
|
+ """Receive and process the next message on the c-channel,
|
|
|
|
+ if any."""
|
|
|
|
+ routing, data = self.cc_session.group_recvmsg(False)
|
|
|
|
+ print("routing", routing)
|
|
|
|
+ print("data", data)
|
|
|
|
+
|
|
|
|
+ def restart_processes(self):
|
|
|
|
+ """Restart any dead processes."""
|
|
|
|
+ # XXX: this needs a back-off algorithm
|
|
|
|
+ still_dead = {}
|
|
|
|
+ for proc_info in self.dead_processes.values():
|
|
|
|
+ if self.verbose:
|
|
|
|
+ sys.stdout.write("Resurrecting dead %s process...\n" %
|
|
|
|
+ proc_info.name)
|
|
|
|
+ try:
|
|
|
|
+ proc_info.respawn()
|
|
|
|
+ self.processes[proc_info.pid] = proc_info
|
|
|
|
+ if self.verbose:
|
|
|
|
+ sys.stdout.write("Resurrected %s (PID %d)\n" %
|
|
|
|
+ (proc_info.name, proc_info.pid))
|
|
|
|
+ except:
|
|
|
|
+ still_dead[proc_info.pid] = proc_info
|
|
|
|
+ # remember any processes that refuse to be resurrected
|
|
|
|
+ self.dead_processes = still_dead
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
def reaper(signal_number, stack_frame):
|
|
def reaper(signal_number, stack_frame):
|
|
@@ -214,13 +245,9 @@ if __name__ == "__main__":
|
|
if o.errno == errno.ECHILD: break
|
|
if o.errno == errno.ECHILD: break
|
|
raise
|
|
raise
|
|
if pid == 0: break
|
|
if pid == 0: break
|
|
- if not boss_of_bind.reap(pid, exit_status):
|
|
|
|
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
|
|
|
- boss_of_bind.shutdown()
|
|
|
|
- sys.exit(0)
|
|
|
|
|
|
+ if boss_of_bind:
|
|
|
|
+ boss_of_bind.reap(pid, exit_status)
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
def get_signame(signal_number):
|
|
def get_signame(signal_number):
|
|
"""Return the symbolic name for a signal."""
|
|
"""Return the symbolic name for a signal."""
|
|
for sig in dir(signal):
|
|
for sig in dir(signal):
|
|
@@ -232,14 +259,11 @@ if __name__ == "__main__":
|
|
# XXX: perhaps register atexit() function and invoke that instead
|
|
# XXX: perhaps register atexit() function and invoke that instead
|
|
def fatal_signal(signal_number, stack_frame):
|
|
def fatal_signal(signal_number, stack_frame):
|
|
"""We need to exit (SIGINT or SIGTERM received)."""
|
|
"""We need to exit (SIGINT or SIGTERM received)."""
|
|
- global boss_of_bind
|
|
|
|
global options
|
|
global options
|
|
if options.verbose:
|
|
if options.verbose:
|
|
sys.stdout.write("Received %s.\n" % get_signame(signal_number))
|
|
sys.stdout.write("Received %s.\n" % get_signame(signal_number))
|
|
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
|
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
|
- if boss_of_bind:
|
|
|
|
- boss_of_bind.shutdown()
|
|
|
|
- sys.exit(0)
|
|
|
|
|
|
+ boss_of_bind.runnable = False
|
|
|
|
|
|
def check_port(option, opt_str, value, parser):
|
|
def check_port(option, opt_str, value, parser):
|
|
"""Function to insure that the port we are passed is actually
|
|
"""Function to insure that the port we are passed is actually
|
|
@@ -265,6 +289,10 @@ if __name__ == "__main__":
|
|
# http://code.google.com/p/procname/
|
|
# http://code.google.com/p/procname/
|
|
# http://github.com/lericson/procname/
|
|
# http://github.com/lericson/procname/
|
|
|
|
|
|
|
|
+ # Create wakeup pipe for signal handlers
|
|
|
|
+ wakeup_pipe = os.pipe()
|
|
|
|
+ signal.set_wakeup_fd(wakeup_pipe[1])
|
|
|
|
+
|
|
# Set signal handlers for catching child termination, as well
|
|
# Set signal handlers for catching child termination, as well
|
|
# as our own demise.
|
|
# as our own demise.
|
|
signal.signal(signal.SIGCHLD, reaper)
|
|
signal.signal(signal.SIGCHLD, reaper)
|
|
@@ -279,6 +307,36 @@ if __name__ == "__main__":
|
|
sys.stderr.write("Error on startup: %s\n" % startup_result)
|
|
sys.stderr.write("Error on startup: %s\n" % startup_result)
|
|
sys.exit(1)
|
|
sys.exit(1)
|
|
|
|
|
|
- while True:
|
|
|
|
- time.sleep(1)
|
|
|
|
|
|
+ # In our main loop, we check for dead processes or messages
|
|
|
|
+ # on the c-channel.
|
|
|
|
+ event_poller = select.poll()
|
|
|
|
+ wakeup_fd = wakeup_pipe[0]
|
|
|
|
+ event_poller.register(wakeup_fd, select.POLLIN)
|
|
|
|
+ cc_fd = boss_of_bind.cc_session._socket.fileno()
|
|
|
|
+ event_poller.register(cc_fd, select.POLLIN)
|
|
|
|
+ while boss_of_bind.runnable:
|
|
|
|
+ # XXX: get time for next restart for poll
|
|
|
|
+
|
|
|
|
+ # poll() can raise EINTR when a signal arrives,
|
|
|
|
+ # even if they are resumable, so we have to catch
|
|
|
|
+ # the exception
|
|
|
|
+ try:
|
|
|
|
+ events = event_poller.poll()
|
|
|
|
+ except select.error as err:
|
|
|
|
+ if err.args[0] == errno.EINTR:
|
|
|
|
+ events = []
|
|
|
|
+ else:
|
|
|
|
+ sys.stderr.write("Error with poll(); %s\n" % err)
|
|
|
|
+ break
|
|
|
|
+
|
|
|
|
+ for (fd, event) in events:
|
|
|
|
+ if fd == cc_fd:
|
|
|
|
+ boss_of_bind.recv_and_process_cc_msg()
|
|
|
|
+ elif fd == wakeup_fd:
|
|
|
|
+ os.read(wakeup_fd, 32)
|
|
|
|
+
|
|
|
|
+ boss_of_bind.restart_processes()
|
|
|
|
|
|
|
|
+ # shutdown
|
|
|
|
+ signal.signal(signal.SIGCHLD, signal.SIG_DFL)
|
|
|
|
+ boss_of_bind.shutdown()
|