Browse Source

Merge branch 'trac3074'

Mukund Sivaraman 11 years ago
parent
commit
ed672a898d

+ 16 - 3
src/bin/bind10/init.py.in

@@ -337,6 +337,7 @@ class Init:
                 self.__propagate_component_config(new_config['components'])
                 self.__propagate_component_config(new_config['components'])
             return isc.config.ccsession.create_answer(0)
             return isc.config.ccsession.create_answer(0)
         except Exception as e:
         except Exception as e:
+            logger.error(BIND10_RECONFIGURE_ERROR, e)
             return isc.config.ccsession.create_answer(1, str(e))
             return isc.config.ccsession.create_answer(1, str(e))
 
 
     def get_processes(self):
     def get_processes(self):
@@ -597,6 +598,13 @@ class Init:
             process, the log_starting/log_started methods are not used.
             process, the log_starting/log_started methods are not used.
         """
         """
         logger.info(BIND10_STARTING_CC)
         logger.info(BIND10_STARTING_CC)
+
+        # Unsubscribe from the other CC session first, because we only
+        # monitor one and msgq expects all data sent to us to be read,
+        # or it will close its side of the socket.
+        if self.cc_session is not None:
+            self.cc_session.group_unsubscribe("Init")
+
         self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
         self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
                                       self.config_handler,
                                       self.config_handler,
                                       self.command_handler,
                                       self.command_handler,
@@ -764,9 +772,14 @@ class Init:
         it might want to choose if it is for this one).
         it might want to choose if it is for this one).
         """
         """
         logger.info(BIND10_STOP_PROCESS, process)
         logger.info(BIND10_STOP_PROCESS, process)
-        self.cc_session.group_sendmsg(isc.config.ccsession.
+        try:
-                                      create_command('shutdown', {'pid': pid}),
+            self.cc_session.group_sendmsg(isc.config.ccsession.
-                                      recipient, recipient)
+                                          create_command('shutdown',
+                                                         {'pid': pid}),
+                                          recipient, recipient)
+        except:
+            logger.error(BIND10_COMPONENT_SHUTDOWN_ERROR, process)
+            raise
 
 
     def component_shutdown(self, exitcode=0):
     def component_shutdown(self, exitcode=0):
         """
         """

+ 7 - 0
src/bin/bind10/init_messages.mes

@@ -325,3 +325,10 @@ the configuration manager to start up.  The total length of time Init
 will wait for the configuration manager before reporting an error is
 will wait for the configuration manager before reporting an error is
 set with the command line --wait switch, which has a default value of
 set with the command line --wait switch, which has a default value of
 ten seconds.
 ten seconds.
+
+% BIND10_RECONFIGURE_ERROR Error applying new config: %1
+A new configuration was received, but there was an error doing the
+re-configuration.
+
+% BIND10_COMPONENT_SHUTDOWN_ERROR An error occured stopping component %1
+An attempt to gracefully shutdown a component failed.

+ 3 - 1
src/bin/msgq/msgq.py.in

@@ -525,7 +525,9 @@ class MsgQ:
             # Append it to buffer (but check the data go away)
             # Append it to buffer (but check the data go away)
             if fileno in self.sendbuffs:
             if fileno in self.sendbuffs:
                 (last_sent, buff) = self.sendbuffs[fileno]
                 (last_sent, buff) = self.sendbuffs[fileno]
-                if now - last_sent > 0.1:
+                tdelta = now - last_sent
+                if tdelta > 0.1:
+                    logger.error(MSGQ_SOCKET_TIMEOUT_ERROR, fileno, tdelta)
                     self.kill_socket(fileno, sock)
                     self.kill_socket(fileno, sock)
                     return False
                     return False
                 buff += msg
                 buff += msg

+ 6 - 0
src/bin/msgq/msgq_messages.mes

@@ -142,3 +142,9 @@ data structure.
 % MSGQ_SUBS_NEW_TARGET Creating new target for subscription to group '%1' for instance '%2'
 % MSGQ_SUBS_NEW_TARGET Creating new target for subscription to group '%1' for instance '%2'
 Debug message. Creating a new subscription. Also creating a new data structure
 Debug message. Creating a new subscription. Also creating a new data structure
 to hold it.
 to hold it.
+
+% MSGQ_SOCKET_TIMEOUT_ERROR Killing socket %1 because timeout exceeded (%2)
+Outgoing data was queued up on a socket connected to msgq, but the other
+side is not reading it. It could be deadlocked, or may not be monitoring
+it. Both cases are programming errors and should be corrected. The socket
+is closed on the msgq side.