Browse Source

[3780] MySQL and Postgres lease managers now exit on fatal error detection

src/lib/dhcpsrv/dhcpsrv_messages.mes
    added messages DHCPSRV_MYSQL_FATAL_ERROR, DHCPSRV_PGSQL_FATAL_ERROR

src/lib/dhcpsrv/mysql_lease_mgr.cc
    added MySQL client error code include

    MySqlLeaseMgr::checkError() - method is no longer inlined in
    the header.  Expanded to detect unrecoverable errors, log
    them and call exit().

src/lib/dhcpsrv/mysql_lease_mgr.h
    Removed inline implemenation of MySqlLeaseMgr::checkError(),
    and expanded commentary

src/lib/dhcpsrv/pgsql_lease_mgr.cc
    PgSqlLeaseMgr::addLeaseCommon() - now uses checkStatementError()

    PgSqlLeaseMgr::checkStatementError() - Expanded to detect
    unrecoverable errors, log them and call exit().

src/lib/dhcpsrv/pgsql_lease_mgr.h
    Expanded commentary for PgSqlLeaseMgr::checkStatementError()
Thomas Markwalder 9 years ago
parent
commit
be964a2b42

+ 10 - 0
src/lib/dhcpsrv/dhcpsrv_messages.mes

@@ -424,6 +424,11 @@ A debug message issued when the server has removed a number of reclaimed
 leases from the database. The number of removed leases is included in the
 message.
 
+% DHCPSRV_MYSQL_FATAL_ERROR Unrecoverable MySQL error occurred: %1 for <%2>, reason: %3 (error code: %4). Server exiting now!
+An error message indicating that communication with the MySQL database server
+has been lost.  When this occurs the server exits immediately with a non-zero
+exit code.  This is most likely due to a network issue.
+
 % DHCPSRV_MYSQL_GET_ADDR4 obtaining IPv4 lease for address %1
 A debug message issued when the server is attempting to obtain an IPv4
 lease from the MySQL database for the specified address.
@@ -533,6 +538,11 @@ unlikely to occur or negatively impact server operation.
 A debug message issued when the server is attempting to delete a lease for
 the specified address from the PostgreSQL database for the specified address.
 
+% DHCPSRV_PGSQL_FATAL_ERROR Unrecoverable Postgres error occurred: Statement: <%1>, reason: %2 (error code: %3). Server exiting now!
+An error message indicating that communication with the MySQL database server
+has been lost.  When this occurs the server exits immediately with a non-zero
+exit code.  This is most likely due to a network issue.
+
 % DHCPSRV_PGSQL_GET_ADDR4 obtaining IPv4 lease for address %1
 A debug message issued when the server is attempting to obtain an IPv4
 lease from the PostgreSQL database for the specified address.

+ 34 - 0
src/lib/dhcpsrv/mysql_lease_mgr.cc

@@ -23,6 +23,7 @@
 
 #include <boost/static_assert.hpp>
 #include <mysqld_error.h>
+#include <errmsg.h>
 
 #include <iostream>
 #include <iomanip>
@@ -2122,5 +2123,38 @@ MySqlLeaseMgr::rollback() {
     }
 }
 
+void
+MySqlLeaseMgr::checkError(int status, StatementIndex index,
+                           const char* what) const {
+    if (status != 0) {
+        switch(mysql_errno(conn_.mysql_)) {
+            // These are the ones we consider fatal. Remember this method is
+            // used to check errors of API calls made subsequent to successfully
+            // connecting.  Errors occuring while attempting to connect are
+            // checked in the connection code. An alternative would be to call
+            // mysql_ping() - assuming autoreconnect is off. If that fails
+            // then we know connection is toast.
+            case CR_SERVER_GONE_ERROR:
+            case CR_SERVER_LOST:
+            case CR_OUT_OF_MEMORY:
+            case CR_CONNECTION_ERROR:
+                // We're exiting on fatal
+                LOG_ERROR(dhcpsrv_logger, DHCPSRV_MYSQL_FATAL_ERROR)
+                         .arg(what)
+                         .arg(conn_.text_statements_[index])
+                         .arg(mysql_error(conn_.mysql_))
+                         .arg(mysql_errno(conn_.mysql_));
+                exit (-1);
+
+            default:
+                // Connection is ok, so it must be an SQL error
+                isc_throw(DbOperationError, what << " for <"
+                          << conn_.text_statements_[index] << ">, reason: "
+                          << mysql_error(conn_.mysql_) << " (error code "
+                          << mysql_errno(conn_.mysql_) << ")");
+        }
+    }
+}
+
 }; // end of isc::dhcp namespace
 }; // end of isc namespace

+ 18 - 11
src/lib/dhcpsrv/mysql_lease_mgr.h

@@ -669,8 +669,22 @@ private:
     /// @brief Check Error and Throw Exception
     ///
     /// Virtually all MySQL functions return a status which, if non-zero,
-    /// indicates an error.  This inline function conceals a lot of error
-    /// checking/exception-throwing code.
+    /// indicates an error.  This function centralizes the error checking
+    /// code.
+    ///
+    /// It is used to determine whether or not the function succeeded, and
+    /// in the event of failures, decide whether or not those failures are
+    /// recoverable.
+    ///
+    /// If the error is recoverable, the method will throw a DbOperationError.
+    /// In the error is deemed unrecoverable, such as a loss of connectivity
+    /// with the server, this method will log the error and call exit(-1);
+    ///
+    /// @todo Calling exit() is viewed as a short term solution for Kea 1.0.
+    /// Two tickets are likely to alter this behavior, first is #3639, which
+    /// calls for the ability to attempt to reconnect to the database. The
+    /// second ticket, #4087 which calls for the implementation of a generic,
+    /// FatalException class which will propagate outward.
     ///
     /// @param status Status code: non-zero implies an error
     /// @param index Index of statement that caused the error
@@ -678,15 +692,8 @@ private:
     ///
     /// @throw isc::dhcp::DbOperationError An operation on the open database has
     ///        failed.
-    inline void checkError(int status, StatementIndex index,
-                           const char* what) const {
-        if (status != 0) {
-            isc_throw(DbOperationError, what << " for <" <<
-                      conn_.text_statements_[index] << ">, reason: " <<
-                      mysql_error(conn_.mysql_) << " (error code " <<
-                      mysql_errno(conn_.mysql_) << ")");
-        }
-    }
+    void checkError(int status, StatementIndex index,
+                    const char* what) const;
 
     // Members
 

+ 19 - 5
src/lib/dhcpsrv/pgsql_lease_mgr.cc

@@ -1131,11 +1131,7 @@ PgSqlLeaseMgr::addLeaseCommon(StatementIndex stindex,
             return (false);
         }
 
-        const char* errorMsg = PQerrorMessage(conn_);
-        PQclear(r);
-        isc_throw(DbOperationError, "unable to INSERT for " <<
-                  tagged_statements[stindex].name << ", reason: " <<
-                  errorMsg);
+        checkStatementError(r, stindex);
     }
 
     PQclear(r);
@@ -1650,6 +1646,24 @@ void
 PgSqlLeaseMgr::checkStatementError(PGresult*& r, StatementIndex index) const {
     int s = PQresultStatus(r);
     if (s != PGRES_COMMAND_OK && s != PGRES_TUPLES_OK) {
+        // We're testing the first two chars of SQLSTATE, as this is the
+        // error class. Note, there is a severity field, but it can be
+        // misleadingly returned as fatal.
+        const char* sqlstate = PQresultErrorField(r, PG_DIAG_SQLSTATE);
+        if ((sqlstate != NULL) &&
+            ((memcmp(sqlstate, "08", 2) == 0) ||  // Connection Exception
+             (memcmp(sqlstate, "53", 2) == 0) ||  // Insufficient resources
+             (memcmp(sqlstate, "54", 2) == 0) ||  // Program Limit exceeded
+             (memcmp(sqlstate, "57", 2) == 0) ||  // Operator intervention
+             (memcmp(sqlstate, "58", 2) == 0))) { // System error
+            LOG_ERROR(dhcpsrv_logger, DHCPSRV_PGSQL_FATAL_ERROR)
+                         .arg(tagged_statements[index].name)
+                         .arg(PQerrorMessage(conn_))
+                         .arg(sqlstate);
+            PQclear(r);
+            exit (-1);
+        }
+
         const char* error_message = PQerrorMessage(conn_);
         PQclear(r);
         isc_throw(DbOperationError, "Statement exec faild:" << " for: "

+ 13 - 2
src/lib/dhcpsrv/pgsql_lease_mgr.h

@@ -591,8 +591,19 @@ private:
 
     /// @brief Checks result of the r object
     ///
-    /// Checks status of the operation passed as first argument and throws
-    /// DbOperationError with details if it is non-success.
+    /// This function is used to determine whether or not the SQL statement
+    /// execution succeeded, and in the event of failures, decide whether or
+    /// not the failures are recoverable.
+    ///
+    /// If the error is recoverable, the method will throw a DbOperationError.
+    /// In the error is deemed unrecoverable, such as a loss of connectivity
+    /// with the server, this method will log the error and call exit(-1);
+    ///
+    /// @todo Calling exit() is viewed as a short term solution for Kea 1.0.
+    /// Two tickets are likely to alter this behavior, first is #3639, which
+    /// calls for the ability to attempt to reconnect to the database. The
+    /// second ticket, #4087 which calls for the implementation of a generic,
+    /// FatalException class which will propagate outward.
     ///
     /// @param r result of the last PostgreSQL operation
     /// @param index will be used to print out compiled statement name