|
@@ -92,51 +92,6 @@ VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
|
|
|
# This is for boot_time of Boss
|
|
|
_BASETIME = time.gmtime()
|
|
|
|
|
|
-class RestartSchedule:
|
|
|
- """
|
|
|
-Keeps state when restarting something (in this case, a process).
|
|
|
-
|
|
|
-When a process dies unexpectedly, we need to restart it. However, if
|
|
|
-it fails to restart for some reason, then we should not simply keep
|
|
|
-restarting it at high speed.
|
|
|
-
|
|
|
-A more sophisticated algorithm can be developed, but for now we choose
|
|
|
-a simple set of rules:
|
|
|
-
|
|
|
- * If a process was been running for >=10 seconds, we restart it
|
|
|
- right away.
|
|
|
- * If a process was running for <10 seconds, we wait until 10 seconds
|
|
|
- after it was started.
|
|
|
-
|
|
|
-To avoid programs getting into lockstep, we use a normal distribution
|
|
|
-to avoid being restarted at exactly 10 seconds."""
|
|
|
-
|
|
|
- def __init__(self, restart_frequency=10.0):
|
|
|
- self.restart_frequency = restart_frequency
|
|
|
- self.run_start_time = None
|
|
|
- self.run_stop_time = None
|
|
|
- self.restart_time = None
|
|
|
-
|
|
|
- def set_run_start_time(self, when=None):
|
|
|
- if when is None:
|
|
|
- when = time.time()
|
|
|
- self.run_start_time = when
|
|
|
- sigma = self.restart_frequency * 0.05
|
|
|
- self.restart_time = when + random.normalvariate(self.restart_frequency,
|
|
|
- sigma)
|
|
|
-
|
|
|
- def set_run_stop_time(self, when=None):
|
|
|
- """We don't actually do anything with stop time now, but it
|
|
|
- might be useful for future algorithms."""
|
|
|
- if when is None:
|
|
|
- when = time.time()
|
|
|
- self.run_stop_time = when
|
|
|
-
|
|
|
- def get_restart_time(self, when=None):
|
|
|
- if when is None:
|
|
|
- when = time.time()
|
|
|
- return max(when, self.restart_time)
|
|
|
-
|
|
|
class ProcessInfoError(Exception): pass
|
|
|
|
|
|
class ProcessInfo:
|
|
@@ -151,7 +106,6 @@ class ProcessInfo:
|
|
|
self.env = env
|
|
|
self.dev_null_stdout = dev_null_stdout
|
|
|
self.dev_null_stderr = dev_null_stderr
|
|
|
- self.restart_schedule = RestartSchedule()
|
|
|
self.uid = uid
|
|
|
self.username = username
|
|
|
self.process = None
|
|
@@ -200,7 +154,6 @@ class ProcessInfo:
|
|
|
env=spawn_env,
|
|
|
preexec_fn=self._preexec_work)
|
|
|
self.pid = self.process.pid
|
|
|
- self.restart_schedule.set_run_start_time()
|
|
|
|
|
|
# spawn() and respawn() are the same for now, but in the future they
|
|
|
# may have different functionality
|
|
@@ -247,8 +200,6 @@ class BoB:
|
|
|
self.cfg_start_dhcp6 = False
|
|
|
self.cfg_start_dhcp4 = False
|
|
|
self.curproc = None
|
|
|
- # XXX: Not used now, waits for reintroduction of restarts.
|
|
|
- self.dead_processes = {}
|
|
|
self.msgq_socket_file = msgq_socket_file
|
|
|
self.nocache = nocache
|
|
|
self.component_config = {}
|
|
@@ -257,6 +208,8 @@ class BoB:
|
|
|
# inapropriate. But as the code isn't probably completely ready
|
|
|
# for it, we leave it at components for now.
|
|
|
self.components = {}
|
|
|
+ # Simply list of components that died and need to wait for a
|
|
|
+ # restart. Components manage their own restart schedule now
|
|
|
self.components_to_restart = []
|
|
|
self.runnable = False
|
|
|
self.uid = setuid
|
|
@@ -838,10 +791,15 @@ class BoB:
|
|
|
timeout value.
|
|
|
|
|
|
"""
|
|
|
+ if not self.runnable:
|
|
|
+ return 0
|
|
|
still_dead = []
|
|
|
+ # keep track of the first time we need to check this queue again,
|
|
|
+ # if at all
|
|
|
next_restart_time = None
|
|
|
+ now = time.time()
|
|
|
for component in self.components_to_restart:
|
|
|
- if not component.restart():
|
|
|
+ if not component.restart(now):
|
|
|
still_dead.append(component)
|
|
|
if next_restart_time is None or\
|
|
|
next_restart_time > component.get_restart_time():
|
|
@@ -1033,10 +991,6 @@ def main():
|
|
|
while boss_of_bind.runnable:
|
|
|
# clean up any processes that exited
|
|
|
boss_of_bind.reap_children()
|
|
|
- # XXX: As we don't put anything into the processes to be restarted,
|
|
|
- # this is really a complicated NOP. But we will try to reintroduce
|
|
|
- # delayed restarts, so it stays here for now, until we find out if
|
|
|
- # it's useful.
|
|
|
next_restart = boss_of_bind.restart_processes()
|
|
|
if next_restart is None:
|
|
|
wait_time = None
|