Browse Source

Merge branch 'trac3074'

Mukund Sivaraman 11 years ago
parent
commit
ed672a898d

+ 16 - 3
src/bin/bind10/init.py.in

@@ -337,6 +337,7 @@ class Init:
                 self.__propagate_component_config(new_config['components'])
             return isc.config.ccsession.create_answer(0)
         except Exception as e:
+            logger.error(BIND10_RECONFIGURE_ERROR, e)
             return isc.config.ccsession.create_answer(1, str(e))
 
     def get_processes(self):
@@ -597,6 +598,13 @@ class Init:
             process, the log_starting/log_started methods are not used.
         """
         logger.info(BIND10_STARTING_CC)
+
+        # Unsubscribe from the other CC session first, because we only
+        # monitor one and msgq expects all data sent to us to be read,
+        # or it will close its side of the socket.
+        if self.cc_session is not None:
+            self.cc_session.group_unsubscribe("Init")
+
         self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
                                       self.config_handler,
                                       self.command_handler,
@@ -764,9 +772,14 @@ class Init:
         it might want to choose if it is for this one).
         """
         logger.info(BIND10_STOP_PROCESS, process)
-        self.cc_session.group_sendmsg(isc.config.ccsession.
-                                      create_command('shutdown', {'pid': pid}),
-                                      recipient, recipient)
+        try:
+            self.cc_session.group_sendmsg(isc.config.ccsession.
+                                          create_command('shutdown',
+                                                         {'pid': pid}),
+                                          recipient, recipient)
+        except:
+            logger.error(BIND10_COMPONENT_SHUTDOWN_ERROR, process)
+            raise
 
     def component_shutdown(self, exitcode=0):
         """

+ 7 - 0
src/bin/bind10/init_messages.mes

@@ -325,3 +325,10 @@ the configuration manager to start up.  The total length of time Init
 will wait for the configuration manager before reporting an error is
 set with the command line --wait switch, which has a default value of
 ten seconds.
+
+% BIND10_RECONFIGURE_ERROR Error applying new config: %1
+A new configuration was received, but there was an error doing the
+re-configuration.
+
+% BIND10_COMPONENT_SHUTDOWN_ERROR An error occured stopping component %1
+An attempt to gracefully shutdown a component failed.

+ 3 - 1
src/bin/msgq/msgq.py.in

@@ -525,7 +525,9 @@ class MsgQ:
             # Append it to buffer (but check the data go away)
             if fileno in self.sendbuffs:
                 (last_sent, buff) = self.sendbuffs[fileno]
-                if now - last_sent > 0.1:
+                tdelta = now - last_sent
+                if tdelta > 0.1:
+                    logger.error(MSGQ_SOCKET_TIMEOUT_ERROR, fileno, tdelta)
                     self.kill_socket(fileno, sock)
                     return False
                 buff += msg

+ 6 - 0
src/bin/msgq/msgq_messages.mes

@@ -142,3 +142,9 @@ data structure.
 % MSGQ_SUBS_NEW_TARGET Creating new target for subscription to group '%1' for instance '%2'
 Debug message. Creating a new subscription. Also creating a new data structure
 to hold it.
+
+% MSGQ_SOCKET_TIMEOUT_ERROR Killing socket %1 because timeout exceeded (%2)
+Outgoing data was queued up on a socket connected to msgq, but the other
+side is not reading it. It could be deadlocked, or may not be monitoring
+it. Both cases are programming errors and should be corrected. The socket
+is closed on the msgq side.