component.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. # Copyright (C) 2011 Internet Systems Consortium, Inc. ("ISC")
  2. #
  3. # Permission to use, copy, modify, and distribute this software for any
  4. # purpose with or without fee is hereby granted, provided that the above
  5. # copyright notice and this permission notice appear in all copies.
  6. #
  7. # THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
  8. # DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
  9. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
  10. # INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
  11. # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
  12. # FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  13. # NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  14. # WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. """
  16. Module for managing components (abstraction of process). It allows starting
  17. them in given order, handling when they crash (what happens depends on kind
  18. of component) and shutting down. It also handles the configuration of this.
  19. Dependencies between them are not yet handled. It might turn out they are
  20. needed, in that case they will be added sometime in future.
  21. """
  22. import isc.bind10.sockcreator
  23. import isc.log
  24. from isc.log_messages.bind10_messages import *
  25. import time
  26. from bind10_config import LIBEXECDIR
  27. import os
  28. logger = isc.log.Logger("boss")
  29. DBG_TRACE_DATA = 20
  30. DBG_TRACE_DETAILED = 80
  31. class Component:
  32. """
  33. This represents a single component. It has some defaults of behaviour,
  34. which should be reasonable for majority of ordinary components, but
  35. it might be inherited and modified for special-purpose components,
  36. like the core modules with different ways of starting up.
  37. """
  38. def __init__(self, process, boss, kind, address=None, params=None):
  39. """
  40. Creates the component in not running mode.
  41. The parameters are:
  42. - `process` is the name of the process to start.
  43. - `boss` the boss object to plug into. The component needs to plug
  44. into it to know when it failed, etc.
  45. - `kind` is the kind of component. It may be one of:
  46. * 'core' means the system can't run without it and it can't be
  47. safely restarted. If it does not start, the system is brought
  48. down. If it crashes, the system is turned off as well (with
  49. non-zero exit status).
  50. * 'needed' means the system is able to restart the component,
  51. but it is vital part of the service (like auth server). If
  52. it fails to start or crashes in less than 10s after the first
  53. startup, the system is brought down. If it crashes later on,
  54. it is restarted.
  55. * 'dispensable' means the component should be running, but if it
  56. doesn't start or crashes for some reason, the system simply tries
  57. to restart it and keeps running.
  58. - `address` is the address on message bus. It is used to ask it to
  59. shut down at the end. If you specialize the class for a component
  60. that is shut down differently, it might be None.
  61. - `params` is a list of parameters to pass to the process when it
  62. starts. It is currently unused and this support is left out for
  63. now.
  64. """
  65. if kind not in ['core', 'needed', 'dispensable']:
  66. raise ValueError('Component kind can not be ' + kind)
  67. self.__running = False
  68. # Dead like really dead. No resurrection possible.
  69. self.__dead = False
  70. self._kind = kind
  71. self._boss = boss
  72. self._process = process
  73. self._start_func = None
  74. self._address = address
  75. self._params = params
  76. def start(self):
  77. """
  78. Start the component for the first time or restart it. If you need to
  79. modify the way a component is started, do not replace this method,
  80. but start_internal. This one does some more bookkeeping around.
  81. If you try to start an already running component, it raises ValueError.
  82. """
  83. if self.__dead:
  84. raise ValueError("Can't resurrect already dead component")
  85. if self.running():
  86. raise ValueError("Can't start already running component")
  87. logger.info(BIND10_COMPONENT_START, self.name())
  88. self.__running = True
  89. self.__start_time = time.time()
  90. try:
  91. self.start_internal()
  92. except Exception as e:
  93. logger.error(BIND10_COMPONENT_START_EXCEPTION, self.name(), e)
  94. self.failed()
  95. raise
  96. def start_internal(self):
  97. """
  98. This method does the actual starting of a process. If you need to
  99. change the way the component is started, replace this method.
  100. """
  101. # This one is not tested. For one, it starts a real process
  102. # which is out of scope of unit tests, for another, it just
  103. # delegates the starting to other function in boss (if a derived
  104. # class does not provide an override function), which is tested
  105. # by use.
  106. if self._start_func is not None:
  107. procinfo = self._start_func()
  108. else:
  109. # TODO Handle params, etc
  110. procinfo = self._boss.start_simple(self._process)
  111. self._procinfo = procinfo
  112. self._boss.register_process(self.pid(), self)
  113. def stop(self):
  114. """
  115. Stop the component. If you need to modify the way a component is
  116. stopped, do not replace this method, but stop_internal. This one
  117. does some more bookkeeping.
  118. If you try to stop a component that is not running, it raises
  119. ValueError.
  120. """
  121. # This is not tested. It talks with the outher world, which is out
  122. # of scope of unittests.
  123. if not self.running():
  124. raise ValueError("Can't stop a component which is not running")
  125. logger.info(BIND10_COMPONENT_STOP, self.name())
  126. self.__running = False
  127. self.stop_internal()
  128. def stop_internal(self):
  129. """
  130. This is the method that does the actual stopping of a component.
  131. You can replace this method if you want a different way to do it.
  132. """
  133. self._boss.stop_process(self._process, self._address)
  134. def failed(self):
  135. """
  136. Notify the component it crashed. This will be called from boss object.
  137. If you try to call failed on a component that is not running,
  138. a ValueError is raised.
  139. """
  140. if not self.running():
  141. raise ValueError("Can't fail component that isn't running")
  142. self.__running = False
  143. self.failed_internal()
  144. # If it is a core component or the needed component failed to start
  145. # (including it stopped really soon)
  146. if self._kind == 'core' or \
  147. (self._kind == 'needed' and time.time() - 10 < self.__start_time):
  148. self.__dead = True
  149. logger.fatal(BIND10_COMPONENT_UNSATISFIED, self.name())
  150. self._boss.component_shutdown(1)
  151. # This means we want to restart
  152. else:
  153. logger.warn(BIND10_COMPONENT_RESTART, self.name())
  154. self.start()
  155. def failed_internal(self):
  156. """
  157. This method is called from failed. You can replace it if you need
  158. some specific behaviour when the component crashes. The default
  159. implementation is empty.
  160. Do not raise exceptions from here, please. The propper shutdown
  161. would have not happened.
  162. """
  163. pass
  164. def running(self):
  165. """
  166. Informs if the component is currently running. It assumes the failed
  167. is called whenever the component really fails and there might be some
  168. time in between actual failure and the call.
  169. """
  170. return self.__running
  171. def name(self):
  172. """
  173. Returns human-readable name of the component. This is usually the
  174. name of the executable, but it might be something different in a
  175. derived class.
  176. """
  177. return self._process
  178. def pid(self):
  179. """
  180. Provides a PID of a process, if the component is real running process.
  181. This implementation expects it to be a real process, but derived class
  182. may return None in case the component is something else.
  183. """
  184. return self._procinfo.pid
  185. # These are specialized components. Some of them are components which need
  186. # special care (like the message queue or socket creator) or they need
  187. # some parameters constructed from Boss's command line. They are not tested
  188. # currently, because it is not clear what to test on them anyway and they just
  189. # delegate the work for the boss
  190. class SockCreator(Component):
  191. """
  192. The socket creator component. Will start and stop the socket creator
  193. accordingly.
  194. """
  195. def start_internal(self):
  196. self._boss.curproc = 'b10-sockcreator'
  197. self.__creator = isc.bind10.sockcreator.Creator(LIBEXECDIR + ':' +
  198. os.environ['PATH'])
  199. self._boss.register_process(self.pid(), self)
  200. def stop_internal(self):
  201. if self.__creator is None:
  202. return
  203. self.__creator.terminate()
  204. self.__creator = None
  205. def pid(self):
  206. """
  207. Pid of the socket creator. It is provided differently from a usual
  208. component.
  209. """
  210. return self.__creator.pid()
  211. class Msgq(Component):
  212. """
  213. The message queue. Starting is passed to boss, stopping is not supported
  214. and we leave the boss kill it by signal.
  215. """
  216. def __init__(self, process, boss, kind, address, params):
  217. Component.__init__(self, process, boss, kind)
  218. self._start_func = boss.start_msgq
  219. def stop_internal(self):
  220. pass # Wait for the boss to actually kill it. There's no stop command.
  221. class CfgMgr(Component):
  222. def __init__(self, process, boss, kind, address, params):
  223. Component.__init__(self, process, boss, kind)
  224. self._start_func = boss.start_cfgmgr
  225. self._address = 'ConfigManager'
  226. class Auth(Component):
  227. def __init__(self, process, boss, kind, address, params):
  228. Component.__init__(self, process, boss, kind)
  229. self._start_func = boss.start_auth
  230. self._address = 'Auth'
  231. class Resolver(Component):
  232. def __init__(self, process, boss, kind, address, params):
  233. Component.__init__(self, process, boss, kind)
  234. self._start_func = boss.start_resolver
  235. self._address = 'Resolver'
  236. class CmdCtl(Component):
  237. def __init__(self, process, boss, kind, address, params):
  238. Component.__init__(self, process, boss, kind)
  239. self._start_func = boss.start_cmdctl
  240. self._address = 'Cmdctl'
  241. specials = {
  242. 'sockcreator': SockCreator,
  243. 'msgq': Msgq,
  244. 'cfgmgr': CfgMgr,
  245. # TODO: Should these be replaced by configuration in config manager only?
  246. # They should not have any parameters anyway
  247. 'auth': Auth,
  248. 'resolver': Resolver,
  249. 'cmdctl': CmdCtl
  250. }
  251. """
  252. List of specially started components. Each one should be the class than can
  253. be created for that component.
  254. """
  255. class Configurator:
  256. """
  257. This thing keeps track of configuration changes and starts and stops
  258. components as it goes. It also handles the inital startup and final
  259. shutdown.
  260. Note that this will allow you to stop (by invoking reconfigure) a core
  261. component. There should be some kind of layer protecting users from ever
  262. doing so (users must not stop the config manager, message queue and stuff
  263. like that or the system won't start again).
  264. """
  265. def __init__(self, boss):
  266. """
  267. Initializes the configurator, but nothing is started yet.
  268. The boss parameter is the boss object used to start and stop processes.
  269. """
  270. self.__boss = boss
  271. # These could be __private, but as we access them from within unittest,
  272. # it's more comfortable to have them just _protected.
  273. self._components = {}
  274. self._old_config = {}
  275. self._running = False
  276. def __reconfigure_internal(self, old, new):
  277. """
  278. Does a switch from one configuration to another.
  279. """
  280. self._run_plan(self._build_plan(old, new))
  281. self._old_config = new
  282. def startup(self, configuration):
  283. """
  284. Starts the first set of processes. This configuration is expected
  285. to be hardcoded from the boss itself to start the configuration
  286. manager and other similar things.
  287. """
  288. if self._running:
  289. raise ValueError("Trying to start the component configurator " +
  290. "twice")
  291. logger.info(BIND10_CONFIGURATOR_START)
  292. self.__reconfigure_internal({}, configuration)
  293. self._running = True
  294. def shutdown(self):
  295. """
  296. Shuts everything down.
  297. """
  298. if not self._running:
  299. raise ValueError("Trying to shutdown the component " +
  300. "configurator while it's not yet running")
  301. logger.info(BIND10_CONFIGURATOR_STOP)
  302. self._running = False
  303. self.__reconfigure_internal(self._old_config, {})
  304. def reconfigure(self, configuration):
  305. """
  306. Changes configuration from the current one to the provided. It
  307. starts and stops all the components as needed.
  308. """
  309. if not self._running:
  310. raise ValueError("Trying to reconfigure the component " +
  311. "configurator while it's not yet running")
  312. logger.info(BIND10_CONFIGURATOR_RECONFIGURE)
  313. self.__reconfigure_internal(self._old_config, configuration)
  314. def _build_plan(self, old, new):
  315. """
  316. Builds a plan how to transfer from the old configuration to the new
  317. one. It'll be sorted by priority and it will contain the components
  318. (already created, but not started). Each command in the plan is a dict,
  319. so it can be extended any time in future to include whatever
  320. parameters each operation might need.
  321. Any configuration problems are expected to be handled here, so the
  322. plan is not yet run.
  323. """
  324. logger.debug(DBG_TRACE_DATA, BIND10_CONFIGURATOR_BUILD, old, new)
  325. plan = []
  326. # Handle removals of old components
  327. for cname in old.keys():
  328. if cname not in new:
  329. component = self._components[cname]
  330. if component.running():
  331. plan.append({
  332. 'command': 'stop',
  333. 'component': component,
  334. 'name': cname
  335. })
  336. # Handle transitions of configuration of what is here
  337. for cname in new.keys():
  338. if cname in old:
  339. for option in ['special', 'process', 'kind']:
  340. if new[cname].get(option) != old[cname].get(option):
  341. raise NotImplementedError('Changing configuration of' +
  342. ' a running component is ' +
  343. 'not yet supported. Remove' +
  344. ' and re-add ' + cname +
  345. 'to get the same effect')
  346. # Handle introduction of new components
  347. plan_add = []
  348. for cname in new.keys():
  349. if cname not in old:
  350. params = new[cname]
  351. creator = Component
  352. if 'special' in params:
  353. # TODO: Better error handling
  354. creator = specials[params['special']]
  355. component = creator(params.get('process', cname), self.__boss,
  356. params.get('kind', 'dispensable'),
  357. params.get('address'),
  358. params.get('params'))
  359. priority = params.get('priority', 0)
  360. # We store tuples, priority first, so we can easily sort
  361. plan_add.append((priority, {
  362. 'component': component,
  363. 'command': 'start',
  364. 'name': cname,
  365. }))
  366. # Push the starts there sorted by priority
  367. plan.extend([command for (_, command) in sorted(plan_add,
  368. reverse=True,
  369. key=lambda command:
  370. command[0])])
  371. return plan
  372. def running(self):
  373. return self._running
  374. def _run_plan(self, plan):
  375. """
  376. Run a plan, created beforehead by _build_plan.
  377. With the start and stop commands, it also adds and removes components
  378. in _components.
  379. Currently implemented commands are:
  380. * start
  381. * stop
  382. """
  383. done = 0
  384. try:
  385. logger.debug(DBG_TRACE_DATA, BIND10_CONFIGURATOR_RUN, len(plan))
  386. for task in plan:
  387. component = task['component']
  388. command = task['command']
  389. logger.debug(DBG_TRACE_DETAILED, BIND10_CONFIGURATOR_TASK, command,
  390. component.name())
  391. if command == 'start':
  392. component.start()
  393. self._components[task['name']] = component
  394. elif command == 'stop':
  395. if component.running():
  396. component.stop()
  397. del self._components[task['name']]
  398. else:
  399. # Can Not Happen (as the plans are generated by ourself).
  400. # Therefore not tested.
  401. raise NotImplementedError("Command unknown: " + command)
  402. done += 1
  403. except:
  404. logger.error(BIND10_CONFIGURATOR_PLAN_INTERRUPTED, done, len(plan))
  405. raise