Browse Source

[master] Merge branch 'trac711'

JINMEI Tatuya 12 years ago
parent
commit
3e191cf288

+ 0 - 7
src/bin/bind10/bind10_messages.mes

@@ -308,13 +308,6 @@ During the startup process, a number of messages are exchanged between the
 Boss process and the processes it starts.  This error is output when a
 message received by the Boss process is not recognised.
 
-% BIND10_START_AS_NON_ROOT_RESOLVER starting b10-resolver as a user, not root. This might fail.
-The resolver is being started or restarted without root privileges.
-If the module needs these privileges, it may have problems starting.
-Note that this issue should be resolved by the pending 'socket-creator'
-process; once that has been implemented, modules should not need root
-privileges anymore. See tickets #800 and #801 for more information.
-
 % BIND10_STOP_PROCESS asking %1 to shut down
 The boss module is sending a shutdown command to the given module over
 the message channel.

+ 67 - 12
src/bin/bind10/bind10_src.py.in

@@ -103,8 +103,31 @@ VERSION = "bind10 20110223 (BIND 10 @PACKAGE_VERSION@)"
 # This is for boot_time of Boss
 _BASETIME = time.gmtime()
 
+# Detailed error message commonly used on startup failure, possibly due to
+# permission issue regarding log lock file.  We dump verbose message because
+# it may not be clear exactly what to do if it simply says
+# "failed to open <filename>: permission denied"
+NOTE_ON_LOCK_FILE = """\
+TIP: if this is about permission error for a lock file, check if the directory
+of the file is writable for the user of the bind10 process; often you need
+to start bind10 as a super user.  Also, if you specify the -u option to
+change the user and group, the directory must be writable for the group,
+and the created lock file must be writable for that user. Finally, make sure
+the lock file is not left in the directly before restarting.
+"""
+
 class ProcessInfoError(Exception): pass
 
+class ChangeUserError(Exception):
+    '''Exception raised when setuid/setgid fails.
+
+    When raised, it's expected to be propagated via underlying component
+    management modules to the top level so that it will help provide useful
+    fatal error message.
+
+    '''
+    pass
+
 class ProcessInfo:
     """Information about a process"""
 
@@ -206,8 +229,8 @@ class BoB:
         # restart. Components manage their own restart schedule now
         self.components_to_restart = []
         self.runnable = False
-        self.uid = setuid
-        self.gid = setgid
+        self.__uid = setuid
+        self.__gid = setgid
         self.username = username
         self.verbose = verbose
         self.nokill = nokill
@@ -269,6 +292,31 @@ class BoB:
         # Update the configuration
         self._component_configurator.reconfigure(comps)
 
+    def change_user(self):
+        '''Change the user and group to those specified on construction.
+
+        This method is expected to be called by a component on initial
+        startup when the system is ready to switch the user and group
+        (i.e., once all components that need the privilege of the original
+        user have started).
+        '''
+        try:
+            if self.__gid is not None:
+                logger.info(BIND10_SETGID, self.__gid)
+                posix.setgid(self.__gid)
+        except Exception as ex:
+            raise ChangeUserError('failed to change group: ' + str(ex))
+
+        try:
+            if self.__uid is not None:
+                posix.setuid(self.__uid)
+                # We use one-shot logger after setuid here.  This will
+                # detect any permission issue regarding logging due to the
+                # result of setuid at the earliest opportunity.
+                isc.log.Logger("boss").info(BIND10_SETUID, self.__uid)
+        except Exception as ex:
+            raise ChangeUserError('failed to change user: ' + str(ex))
+
     def config_handler(self, new_config):
         # If this is initial update, don't do anything now, leave it to startup
         if not self.runnable:
@@ -578,8 +626,6 @@ class BoB:
             are pure speculation.  As with the auth daemon, they should be
             read from the configuration database.
         """
-        if self.uid is not None and self.__started:
-            logger.warn(BIND10_START_AS_NON_ROOT_RESOLVER)
         self.curproc = "b10-resolver"
         # XXX: this must be read from the configuration manager in the future
         resargs = ['b10-resolver']
@@ -646,6 +692,9 @@ class BoB:
         try:
             self.c_channel_env = c_channel_env
             self.start_all_components()
+        except ChangeUserError as e:
+            self.kill_started_components()
+            return str(e) + '; ' + NOTE_ON_LOCK_FILE.replace('\n', ' ')
         except Exception as e:
             self.kill_started_components()
             return "Unable to start " + self.curproc + ": " + str(e)
@@ -1155,7 +1204,19 @@ def remove_lock_files():
     for f in lockfiles:
         fname = lpath + '/' + f
         if os.path.isfile(fname):
-            os.unlink(fname)
+            try:
+                os.unlink(fname)
+            except OSError as e:
+                # We catch and ignore permission related error on unlink.
+                # This can happen if bind10 started with -u, created a lock
+                # file as a privileged user, but the directory is not writable
+                # for the changed user.  This setup will cause immediate
+                # start failure, and we leave verbose error message including
+                # the leftover lock file, so it should be acceptable to ignore
+                # it (note that it doesn't make sense to log this event at
+                # this poitn)
+                if e.errno != errno.EPERM and e.errno != errno.EACCES:
+                    raise
 
     return
 
@@ -1173,13 +1234,7 @@ def main():
     except RuntimeError as e:
         sys.stderr.write('ERROR: failed to write the initial log: %s\n' %
                          str(e))
-        sys.stderr.write("""\
-TIP: if this is about permission error for a lock file, check if the directory
-of the file is writable for the user of the bind10 process; often you need
-to start bind10 as a super user.  Also, if you specify the -u option to
-change the user and group, the directory must be writable for the group,
-and the created lock file must be writable for that user.
-""")
+        sys.stderr.write(NOTE_ON_LOCK_FILE)
         sys.exit(1)
 
     # Check user ID.

+ 94 - 3
src/bin/bind10/tests/bind10_test.py.in

@@ -341,6 +341,18 @@ class TestCacheCommands(unittest.TestCase):
 
 
 class TestBoB(unittest.TestCase):
+    def setUp(self):
+        # Save original values that may be tweaked in some tests
+        self.__orig_setgid = bind10_src.posix.setgid
+        self.__orig_setuid = bind10_src.posix.setuid
+        self.__orig_logger_class = isc.log.Logger
+
+    def tearDown(self):
+        # Restore original values saved in setUp()
+        bind10_src.posix.setgid = self.__orig_setgid
+        bind10_src.posix.setuid = self.__orig_setuid
+        isc.log.Logger = self.__orig_logger_class
+
     def test_init(self):
         bob = BoB()
         self.assertEqual(bob.verbose, False)
@@ -349,10 +361,56 @@ class TestBoB(unittest.TestCase):
         self.assertEqual(bob.ccs, None)
         self.assertEqual(bob.components, {})
         self.assertEqual(bob.runnable, False)
-        self.assertEqual(bob.uid, None)
         self.assertEqual(bob.username, None)
         self.assertIsNone(bob._socket_cache)
 
+    def __setgid(self, gid):
+        self.__gid_set = gid
+
+    def __setuid(self, uid):
+        self.__uid_set = uid
+
+    def test_change_user(self):
+        bind10_src.posix.setgid = self.__setgid
+        bind10_src.posix.setuid = self.__setuid
+
+        self.__gid_set = None
+        self.__uid_set = None
+        bob = BoB()
+        bob.change_user()
+        # No gid/uid set in boss, nothing called.
+        self.assertIsNone(self.__gid_set)
+        self.assertIsNone(self.__uid_set)
+
+        BoB(setuid=42, setgid=4200).change_user()
+        # This time, it get's called
+        self.assertEqual(4200, self.__gid_set)
+        self.assertEqual(42, self.__uid_set)
+
+        def raising_set_xid(gid_or_uid):
+            ex = OSError()
+            ex.errno, ex.strerror = errno.EPERM, 'Operation not permitted'
+            raise ex
+
+        # Let setgid raise an exception
+        bind10_src.posix.setgid = raising_set_xid
+        bind10_src.posix.setuid = self.__setuid
+        self.assertRaises(bind10_src.ChangeUserError,
+                          BoB(setuid=42, setgid=4200).change_user)
+
+        # Let setuid raise an exception
+        bind10_src.posix.setgid = self.__setgid
+        bind10_src.posix.setuid = raising_set_xid
+        self.assertRaises(bind10_src.ChangeUserError,
+                          BoB(setuid=42, setgid=4200).change_user)
+
+        # Let initial log output after setuid raise an exception
+        bind10_src.posix.setgid = self.__setgid
+        bind10_src.posix.setuid = self.__setuid
+        isc.log.Logger = raising_set_xid
+        self.assertRaises(bind10_src.ChangeUserError,
+                          BoB(setuid=42, setgid=4200).change_user)
+
     def test_set_creator(self):
         """
         Test the call to set_creator. First time, the cache is created
@@ -423,7 +481,6 @@ class TestBoB(unittest.TestCase):
         self.assertEqual(bob.ccs, None)
         self.assertEqual(bob.components, {})
         self.assertEqual(bob.runnable, False)
-        self.assertEqual(bob.uid, None)
         self.assertEqual(bob.username, None)
 
     def test_command_handler(self):
@@ -2021,8 +2078,10 @@ class TestBossComponents(unittest.TestCase):
 
             def start_all_components(self):
                 self.started = True
-                if self.throw:
+                if self.throw is True:
                     raise Exception('Assume starting components has failed.')
+                elif self.throw:
+                    raise self.throw
 
             def kill_started_components(self):
                 self.killed = True
@@ -2067,6 +2126,12 @@ class TestBossComponents(unittest.TestCase):
         r = bob.startup()
         self.assertEqual({'BIND10_MSGQ_SOCKET_FILE': 'foo'}, bob.c_channel_env)
 
+        # Check failure of changing user results in a different message
+        bob = MockBobStartup(bind10_src.ChangeUserError('failed to chusr'))
+        r = bob.startup()
+        self.assertIn('failed to chusr', r)
+        self.assertTrue(bob.killed)
+
         # Check the case when socket file already exists
         isc.cc.Session = DummySessionSocketExists
         bob = MockBobStartup(False)
@@ -2277,11 +2342,15 @@ class TestFunctions(unittest.TestCase):
         self.assertFalse(os.path.exists(self.lockfile_testpath))
         os.mkdir(self.lockfile_testpath)
         self.assertTrue(os.path.isdir(self.lockfile_testpath))
+        self.__isfile_orig = bind10_src.os.path.isfile
+        self.__unlink_orig = bind10_src.os.unlink
 
     def tearDown(self):
         os.rmdir(self.lockfile_testpath)
         self.assertFalse(os.path.isdir(self.lockfile_testpath))
         os.environ["B10_LOCKFILE_DIR_FROM_BUILD"] = "@abs_top_builddir@"
+        bind10_src.os.path.isfile = self.__isfile_orig
+        bind10_src.os.unlink = self.__unlink_orig
 
     def test_remove_lock_files(self):
         os.environ["B10_LOCKFILE_DIR_FROM_BUILD"] = self.lockfile_testpath
@@ -2305,6 +2374,28 @@ class TestFunctions(unittest.TestCase):
         # second call should not assert anyway
         bind10_src.remove_lock_files()
 
+    def test_remove_lock_files_fail(self):
+        # Permission error on unlink is ignored; other exceptions are really
+        # unexpected and propagated.
+        def __raising_unlink(unused, ex):
+            raise ex
+
+        bind10_src.os.path.isfile = lambda _: True
+        os_error = OSError()
+        bind10_src.os.unlink = lambda f: __raising_unlink(f, os_error)
+
+        os_error.errno = errno.EPERM
+        bind10_src.remove_lock_files() # no disruption
+
+        os_error.errno = errno.EACCES
+        bind10_src.remove_lock_files() # no disruption
+
+        os_error.errno = errno.ENOENT
+        self.assertRaises(OSError, bind10_src.remove_lock_files)
+
+        bind10_src.os.unlink = lambda f: __raising_unlink(f, Exception('bad'))
+        self.assertRaises(Exception, bind10_src.remove_lock_files)
+
     def test_get_signame(self):
         # just test with some samples
         signame = bind10_src.get_signame(signal.SIGTERM)

+ 8 - 2
src/lib/python/isc/bind10/component.py

@@ -177,8 +177,14 @@ class BaseComponent:
             self._start_internal()
         except Exception as e:
             logger.error(BIND10_COMPONENT_START_EXCEPTION, self.name(), e)
-            self.failed(None)
-            raise
+            try:
+                self.failed(None)
+            finally:
+                # Even failed() can fail if this happens during initial startup
+                # time.  In that case we'd rather propagate the original reason
+                # for the failure than the fact that failed() failed.  So we
+                # always re-raise the original exception.
+                raise e
 
     def stop(self):
         """

+ 3 - 12
src/lib/python/isc/bind10/special_component.py

@@ -17,11 +17,7 @@ from isc.bind10.component import Component, BaseComponent
 import isc.bind10.sockcreator
 from bind10_config import LIBEXECPATH
 import os
-import posix
 import isc.log
-from isc.log_messages.bind10_messages import *
-
-logger = isc.log.Logger("boss")
 
 class SockCreator(BaseComponent):
     """
@@ -36,8 +32,6 @@ class SockCreator(BaseComponent):
     def __init__(self, process, boss, kind, address=None, params=None):
         BaseComponent.__init__(self, boss, kind)
         self.__creator = None
-        self.__uid = boss.uid
-        self.__gid = boss.gid
 
     def _start_internal(self):
         self._boss.curproc = 'b10-sockcreator'
@@ -46,12 +40,9 @@ class SockCreator(BaseComponent):
         self._boss.register_process(self.pid(), self)
         self._boss.set_creator(self.__creator)
         self._boss.log_started(self.pid())
-        if self.__gid is not None:
-            logger.info(BIND10_SETGID, self.__gid)
-            posix.setgid(self.__gid)
-        if self.__uid is not None:
-            logger.info(BIND10_SETUID, self.__uid)
-            posix.setuid(self.__uid)
+
+        # We are now ready for switching user.
+        self._boss.change_user()
 
     def _stop_internal(self):
         self.__creator.terminate()

+ 5 - 27
src/lib/python/isc/bind10/tests/component_test.py

@@ -104,10 +104,10 @@ class ComponentTests(BossUtils, unittest.TestCase):
         self.__stop_process_params = None
         self.__start_simple_params = None
         # Pretending to be boss
-        self.gid = None
-        self.__gid_set = None
-        self.uid = None
-        self.__uid_set = None
+        self.__change_user_called = False
+
+    def change_user(self):
+        self.__change_user_called = True # just record the fact it's called
 
     def __start(self):
         """
@@ -624,12 +624,6 @@ class ComponentTests(BossUtils, unittest.TestCase):
         self.assertTrue(process.killed)
         self.assertFalse(process.terminated)
 
-    def setgid(self, gid):
-        self.__gid_set = gid
-
-    def setuid(self, uid):
-        self.__uid_set = uid
-
     class FakeCreator:
         def pid(self):
             return 42
@@ -655,35 +649,19 @@ class ComponentTests(BossUtils, unittest.TestCase):
         """
         component = isc.bind10.special_component.SockCreator(None, self,
                                                              'needed', None)
-        orig_setgid = isc.bind10.special_component.posix.setgid
-        orig_setuid = isc.bind10.special_component.posix.setuid
-        isc.bind10.special_component.posix.setgid = self.setgid
-        isc.bind10.special_component.posix.setuid = self.setuid
         orig_creator = \
             isc.bind10.special_component.isc.bind10.sockcreator.Creator
         # Just ignore the creator call
         isc.bind10.special_component.isc.bind10.sockcreator.Creator = \
             lambda path: self.FakeCreator()
         component.start()
-        # No gid/uid set in boss, nothing called.
-        self.assertIsNone(self.__gid_set)
-        self.assertIsNone(self.__uid_set)
+        self.assertTrue(self.__change_user_called)
         # Doesn't do anything, but doesn't crash
         component.stop()
         component.kill()
         component.kill(True)
-        self.gid = 4200
-        self.uid = 42
         component = isc.bind10.special_component.SockCreator(None, self,
                                                              'needed', None)
-        component.start()
-        # This time, it get's called
-        self.assertEqual(4200, self.__gid_set)
-        self.assertEqual(42, self.__uid_set)
-        isc.bind10.special_component.posix.setgid = orig_setgid
-        isc.bind10.special_component.posix.setuid = orig_setuid
-        isc.bind10.special_component.isc.bind10.sockcreator.Creator = \
-            orig_creator
 
 class TestComponent(BaseComponent):
     """