Browse Source

Merge branch 'work/ixfrfallback'

Michal 'vorner' Vaner 13 years ago
parent
commit
cd3588c902

+ 127 - 0
src/bin/xfrin/tests/xfrin_test.py

@@ -20,6 +20,7 @@ import sys
 import io
 import io
 from isc.testutils.tsigctx_mock import MockTSIGContext
 from isc.testutils.tsigctx_mock import MockTSIGContext
 from xfrin import *
 from xfrin import *
+import xfrin
 from isc.xfrin.diff import Diff
 from isc.xfrin.diff import Diff
 import isc.log
 import isc.log
 
 
@@ -2287,6 +2288,132 @@ class TestMain(unittest.TestCase):
         MockXfrin.check_command_hook = raise_exception
         MockXfrin.check_command_hook = raise_exception
         main(MockXfrin, False)
         main(MockXfrin, False)
 
 
+class TestXfrinProcess(unittest.TestCase):
+    """
+    Some tests for the xfrin_process function. This replaces the
+    XfrinConnection class with itself, so we can emulate whatever behavior we
+    might want.
+
+    Currently only tests for retry if IXFR fails.
+    """
+    def setUp(self):
+        """
+        Backs up the original class implementation so it can be restored
+        and places our own version in place of the constructor.
+
+        Also sets up several internal variables to watch what happens.
+        """
+        # This will hold a "log" of what transfers were attempted.
+        self.__transfers = []
+        # This will "log" if failures or successes happened.
+        self.__published = []
+        # How many connections were created.
+        self.__created_connections = 0
+
+    def __get_connection(self, *args):
+        """
+        Provides a "connection". To mock the connection and see what it is
+        asked to do, we pretend to be the connection.
+        """
+        self.__created_connections += 1
+        return self
+
+    def connect_to_master(self):
+        """
+        Part of pretending to be the connection. It pretends it connected
+        correctly every time.
+        """
+        return True
+
+    def do_xfrin(self, check_soa, request_type):
+        """
+        Part of pretending to be the connection. It looks what answer should
+        be answered now and logs what request happened.
+        """
+        self.__transfers.append(request_type)
+        ret = self.__rets[0]
+        self.__rets = self.__rets[1:]
+        return ret
+
+    def zone_str(self):
+        """
+        Part of pretending to be the connection. It provides the logging name
+        of zone.
+        """
+        return "example.org/IN"
+
+    def publish_xfrin_news(self, zone_name, rrclass, ret):
+        """
+        Part of pretending to be the server as well. This just logs the
+        success/failure of the previous operation.
+        """
+        self.__published.append(ret)
+
+    def close(self):
+        """
+        Part of pretending to be the connection.
+        """
+        pass
+
+    def init_socket(self):
+        """
+        Part of pretending to be the connection.
+        """
+        pass
+
+    def __do_test(self, rets, transfers, request_type):
+        """
+        Do the actual test. The request type, prepared sucesses/failures
+        and expected sequence of transfers is passed to specify what test
+        should happen.
+        """
+        self.__rets = rets
+        published = rets[-1]
+        xfrin.process_xfrin(self, XfrinRecorder(), Name("example.org."),
+                            RRClass.IN(), None, None, None, True, None,
+                            request_type, self.__get_connection)
+        self.assertEqual([], self.__rets)
+        self.assertEqual(transfers, self.__transfers)
+        # Create a connection for each attempt
+        self.assertEqual(len(transfers), self.__created_connections)
+        self.assertEqual([published], self.__published)
+
+    def test_ixfr_ok(self):
+        """
+        Everything OK the first time, over IXFR.
+        """
+        self.__do_test([XFRIN_OK], [RRType.IXFR()], RRType.IXFR())
+
+    def test_axfr_ok(self):
+        """
+        Everything OK the first time, over AXFR.
+        """
+        self.__do_test([XFRIN_OK], [RRType.AXFR()], RRType.AXFR())
+
+    def test_axfr_fail(self):
+        """
+        The transfer failed over AXFR. Should not be retried (we don't expect
+        to fail on AXFR, but succeed on IXFR and we didn't use IXFR in the first
+        place for some reason.
+        """
+        self.__do_test([XFRIN_FAIL], [RRType.AXFR()], RRType.AXFR())
+
+    def test_ixfr_fallback(self):
+        """
+        The transfer fails over IXFR, but suceeds over AXFR. It should fall back
+        to it and say everything is OK.
+        """
+        self.__do_test([XFRIN_FAIL, XFRIN_OK], [RRType.IXFR(), RRType.AXFR()],
+                       RRType.IXFR())
+
+    def test_ixfr_fail(self):
+        """
+        The transfer fails both over IXFR and AXFR. It should report failure
+        (only once) and should try both before giving up.
+        """
+        self.__do_test([XFRIN_FAIL, XFRIN_FAIL],
+                       [RRType.IXFR(), RRType.AXFR()], RRType.IXFR())
+
 if __name__== "__main__":
 if __name__== "__main__":
     try:
     try:
         isc.log.resetUnitTestRootLogger()
         isc.log.resetUnitTestRootLogger()

+ 33 - 13
src/bin/xfrin/xfrin.py.in

@@ -775,15 +775,15 @@ class XfrinConnection(asyncore.dispatcher):
         return False
         return False
 
 
 def __process_xfrin(server, zone_name, rrclass, db_file,
 def __process_xfrin(server, zone_name, rrclass, db_file,
-                    shutdown_event, master_addrinfo, check_soa, tsig_key,
-                    request_type, conn_class=XfrinConnection):
+                  shutdown_event, master_addrinfo, check_soa, tsig_key,
+                  request_type, conn_class):
     conn = None
     conn = None
     exception = None
     exception = None
     ret = XFRIN_FAIL
     ret = XFRIN_FAIL
     try:
     try:
         # Create a data source client used in this XFR session.  Right now we
         # Create a data source client used in this XFR session.  Right now we
-        # still assume an sqlite3-based data source, and use both the old and
-        # new data source APIs.  We also need to use a mock client for tests.
+        # still assume an sqlite3-based data source, and use both the old and new
+        # data source APIs.  We also need to use a mock client for tests.
         # For a temporary workaround to deal with these situations, we skip the
         # For a temporary workaround to deal with these situations, we skip the
         # creation when the given file is none (the test case).  Eventually
         # creation when the given file is none (the test case).  Eventually
         # this code will be much cleaner.
         # this code will be much cleaner.
@@ -796,16 +796,36 @@ def __process_xfrin(server, zone_name, rrclass, db_file,
             datasrc_config = "{ \"database_file\": \"" + db_file + "\"}"
             datasrc_config = "{ \"database_file\": \"" + db_file + "\"}"
             datasrc_client = DataSourceClient(datasrc_type, datasrc_config)
             datasrc_client = DataSourceClient(datasrc_type, datasrc_config)
 
 
-        # Create a TCP connection for the XFR session and perform the operation
+        # Create a TCP connection for the XFR session and perform the operation.
         sock_map = {}
         sock_map = {}
-        conn = conn_class(sock_map, zone_name, rrclass, datasrc_client,
-                          shutdown_event, master_addrinfo, tsig_key)
-        conn.init_socket()
-        # XXX: We still need _db_file for temporary workaround in _create_query().
-        # This should be removed when we eliminate the need for the workaround.
-        conn._db_file = db_file
-        if conn.connect_to_master():
-            ret = conn.do_xfrin(check_soa, request_type)
+        # In case we were asked to do IXFR and that one fails, we try again with
+        # AXFR. But only if we could actually connect to the server.
+        #
+        # So we start with retry as True, which is set to false on each attempt.
+        # In the case of connected but failed IXFR, we set it to true once again.
+        retry = True
+        while retry:
+            retry = False
+            conn = conn_class(sock_map, zone_name, rrclass, datasrc_client,
+                              shutdown_event, master_addrinfo, tsig_key)
+            conn.init_socket()
+            # XXX: We still need _db_file for temporary workaround in _create_query().
+            # This should be removed when we eliminate the need for the workaround.
+            conn._db_file = db_file
+            ret = XFRIN_FAIL
+            if conn.connect_to_master():
+                ret = conn.do_xfrin(check_soa, request_type)
+                if ret == XFRIN_FAIL and request_type == RRType.IXFR():
+                    # IXFR failed for some reason. It might mean the server can't
+                    # handle it, or we don't have the zone or we are out of sync or
+                    # whatever else. So we retry with with AXFR, as it may succeed
+                    # in many such cases.
+                    retry = True
+                    request_type = RRType.AXFR()
+                    logger.warn(XFRIN_XFR_TRANSFER_FALLBACK, conn.zone_str())
+                    conn.close()
+                    conn = None
+
     except Exception as ex:
     except Exception as ex:
         # If exception happens, just remember it here so that we can re-raise
         # If exception happens, just remember it here so that we can re-raise
         # after cleaning up things.  We don't log it here because we want
         # after cleaning up things.  We don't log it here because we want

+ 6 - 0
src/bin/xfrin/xfrin_messages.mes

@@ -29,6 +29,12 @@ this can only happen for AXFR.
 The XFR transfer for the given zone has failed due to a protocol error.
 The XFR transfer for the given zone has failed due to a protocol error.
 The error is shown in the log message.
 The error is shown in the log message.
 
 
+% XFRIN_XFR_TRANSFER_FALLBACK falling back from IXFR to AXFR for %1
+The IXFR transfer of the given zone failed. This might happen in many cases,
+such that the remote server doesn't support IXFR, we don't have the SOA record
+(or the zone at all), we are out of sync, etc. In many of these situations,
+AXFR could still work. Therefore we try that one in case it helps.
+
 % XFRIN_XFR_PROCESS_FAILURE %1 transfer of zone %2/%3 failed: %4
 % XFRIN_XFR_PROCESS_FAILURE %1 transfer of zone %2/%3 failed: %4
 An XFR session failed outside the main protocol handling.  This
 An XFR session failed outside the main protocol handling.  This
 includes an error at the data source level at the initialization
 includes an error at the data source level at the initialization

+ 16 - 5
tests/system/ixfr/in-3/tests.sh

@@ -22,6 +22,8 @@
 # server; the server should not respond to the request, so the client should
 # server; the server should not respond to the request, so the client should
 # then send an AXFR request and receive the latest copy of the zone.
 # then send an AXFR request and receive the latest copy of the zone.
 
 
+# TODO It seems bind9 still allows IXFR even when provide-ixfr on;
+
 . ../ixfr_init.sh
 . ../ixfr_init.sh
 status=$?
 status=$?
 
 
@@ -29,9 +31,6 @@ status=$?
 old_client_serial=`$DIG_SOA @$CLIENT_IP | $AWK '{print $3}'`
 old_client_serial=`$DIG_SOA @$CLIENT_IP | $AWK '{print $3}'`
 echo "I:SOA serial of IXFR client $CLIENT_NAME is $old_client_serial"
 echo "I:SOA serial of IXFR client $CLIENT_NAME is $old_client_serial"
 
 
-# TODO: Need to alter configuration of BIND 10 server such that it accepts
-# NOTIFYs from and sends IXFR requests to the BIND 9 master.
-
 # If required, get the IXFR server to notify the IXFR client of the new zone.
 # If required, get the IXFR server to notify the IXFR client of the new zone.
 # Do this by allowing notifies and then triggering a re-notification of the
 # Do this by allowing notifies and then triggering a re-notification of the
 # server.
 # server.
@@ -48,8 +47,20 @@ status=`expr $status + $?`
 compare_soa $SERVER_NAME $SERVER_IP $CLIENT_NAME $CLIENT_IP
 compare_soa $SERVER_NAME $SERVER_IP $CLIENT_NAME $CLIENT_IP
 status=`expr $status + $?`
 status=`expr $status + $?`
 
 
-# TODO: Check the BIND 10 log, looking for the IXFR messages that indicate that
-# it has initiated an IXFR and then an AXFR.
+# Check the log there's the IXFR and fallback
+grep XFRIN_XFR_TRANSFER_STARTED nsx2/bind10.run | grep IXFR
+if [ $? -ne 0 ];
+then
+    echo "R:$CLIENT_NAME FAIL no 'IXFR started' message in the BIND 10 log"
+    exit 1
+fi
+
+grep XFRIN_XFR_TRANSFER_FALLBACK nsx2/bind10.run
+if [ $? -ne 0 ];
+then
+    echo "R:$CLIENT_NAME FAIL no fallback message in BIND10 log"
+    exit 1
+fi
 
 
 echo "I:exit status: $status"
 echo "I:exit status: $status"
 exit $status
 exit $status

+ 1 - 0
tests/system/ixfr/named_noixfr.conf

@@ -33,6 +33,7 @@ options {
 	ixfr-from-differences no;
 	ixfr-from-differences no;
 	notify explicit;
 	notify explicit;
 	also-notify { 10.53.0.2; };
 	also-notify { 10.53.0.2; };
+    provide-ixfr no;
 };
 };
 
 
 zone "example" {
 zone "example" {