Browse Source

* More tidying ready for release

git-svn-id: http://proj.badc.rl.ac.uk/svn/ndg-security/trunk/ndg_httpsclient@7988 051b1e3e-aa0c-0410-b6c2-bfbade6052be
pjkersha 13 years ago
parent
commit
8018eafb07

+ 2 - 2
.pydevproject

@@ -2,8 +2,8 @@
 <?eclipse-pydev version="1.0"?>
 
 <pydev_project>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">ndg-httpsclient-py2.7</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 <path>/ndg_httpsclient</path>
 </pydev_pathproperty>

+ 11 - 0
MANIFEST.in

@@ -0,0 +1,11 @@
+#
+# MANIFEST.in file to enable inclusion of unit test data files and config
+# 
+# NDG HTTPS Client Package
+# 
+# P J Kershaw 17/01/12
+# 
+# Copyright (C) 2012 STFC
+# 
+# Licence: BSD - See LICENCE file for details
+recursive-include ndg/ *.crt *.key *.pem README

+ 22 - 3
ndg/httpsclient/https.py

@@ -1,4 +1,4 @@
-"""urllib2pyopenssl HTTPS module containing PyOpenSSL implementation of
+"""ndg_httpsclient HTTPS module containing PyOpenSSL implementation of
 httplib.HTTPSConnection
 
 PyOpenSSL utility to make a httplib-like interface suitable for use with 
@@ -12,8 +12,15 @@ __contact__ = "Philip.Kershaw@stfc.ac.uk"
 __revision__ = '$Id$'
 import logging
 import socket
-from httplib import HTTPConnection, HTTPS_PORT
-from urllib2 import AbstractHTTPHandler
+import sys
+from httplib import HTTPS_PORT
+if sys.version_info < (2, 6, 2):
+    from ndg.httpsclient.httplib_proxy import HTTPConnection
+    from ndg.httpsclient.urllib2_proxy import AbstractHTTPHandler
+else:
+    from httplib import HTTPConnection
+    from urllib2 import AbstractHTTPHandler
+
 
 from OpenSSL import SSL
 
@@ -49,15 +56,23 @@ class HTTPSConnection(HTTPConnection):
         """Create SSL socket and connect to peer
         """
         if getattr(self, 'ssl_context', None):
+            if not isinstance(self.ssl_context, SSL.Context):
+                raise TypeError('Expecting OpenSSL.SSL.Context type for "'
+                                'ssl_context" keyword; got %r instead' %
+                                self.ssl_context)
             ssl_context = self.ssl_context
         else:
             ssl_context = SSL.Context(self.__class__.default_ssl_method)
 
         sock = socket.create_connection((self.host, self.port), self.timeout)
+        
+        # Tunnel if using a proxy - ONLY available for Python 2.6.2 and above      
         if getattr(self, '_tunnel_host', None):
             self.sock = sock
             self._tunnel()
+            
         self.sock = SSLSocket(ssl_context, sock)
+        
         # Go to client mode.
         self.sock.set_connect_state()
 
@@ -82,6 +97,10 @@ class HTTPSContextHandler(AbstractHTTPHandler):
         AbstractHTTPHandler.__init__(self, debuglevel)
 
         if ssl_context is not None:
+            if not isinstance(ssl_context, SSL.Context):
+                raise TypeError('Expecting OpenSSL.SSL.Context type for "'
+                                'ssl_context" keyword; got %r instead' %
+                                ssl_context)
             self.ssl_context = ssl_context
         else:
             self.ssl_context = SSL.Context(SSL.SSLv23_METHOD)

+ 13 - 2
ndg/httpsclient/test/__init__.py

@@ -1,4 +1,4 @@
-"""unit tests package for urllib2pyopenssl
+"""unit tests package for ndg_httpsclient
 
 PyOpenSSL utility to make a httplib-like interface suitable for use with 
 urllib2
@@ -9,9 +9,20 @@ __copyright__ = "(C) 2012 Science and Technology Facilities Council"
 __license__ = "BSD - see LICENSE file in top-level directory"
 __contact__ = "Philip.Kershaw@stfc.ac.uk"
 __revision__ = '$Id$'
+import os
+import unittest
+    
 class Constants(object):
+    '''Convenience base class from which other unit tests can extend.  Its
+    sets the generic data directory path'''
     PORT = 4443
     PORT2 = 4444
     HOSTNAME = 'localhost'
     TEST_URI = 'https://%s:%d' % (HOSTNAME, PORT)
-    TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2)
+    TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2)
+
+    UNITTEST_DIR = os.path.dirname(os.path.abspath(__path__))
+    SSL_CERT_FILENAME = 'localhost.crt'
+    SSL_CERT_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_CERT_FILENAME)
+    SSL_PRIKEY_FILENAME = 'localhost.key'
+    SSL_PRIKEY_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_PRIKEY_FILENAME)

ndg/httpsclient/test/test_get.py → ndg/httpsclient/test/test_utils.py


+ 8 - 3
ndg/httpsclient/urllib2_build_opener.py

@@ -7,12 +7,17 @@ __copyright__ = "(C) 2011 Science and Technology Facilities Council"
 __license__ = "BSD - see LICENSE file in top-level directory"
 __contact__ = "Philip.Kershaw@stfc.ac.uk"
 __revision__ = '$Id: pyopenssl.py 7929 2011-08-16 16:39:13Z pjkersha $'
-
 import logging
-from urllib2 import (OpenerDirector, ProxyHandler, UnknownHandler, HTTPHandler,
-                     HTTPDefaultErrorHandler, HTTPRedirectHandler,
+from urllib2 import (ProxyHandler, UnknownHandler, HTTPDefaultErrorHandler, 
                      FTPHandler, FileHandler, HTTPErrorProcessor)
 
+import sys
+if sys.version_info < (2, 6, 2):
+    from ndg.httpsclient.urllib2_proxy import (HTTPHandler, OpenerDirector, 
+                                               HTTPRedirectHandler)
+else:
+    from urllib2 import HTTPHandler, OpenerDirector, HTTPRedirectHandler
+
 from ndg.httpsclient.https import HTTPSContextHandler
 
 log = logging.getLogger(__name__)

+ 262 - 0
ndg/httpsclient/urllib2_proxy.py

@@ -0,0 +1,262 @@
+'''
+Created on 12 Jan 2012
+
+@author: rwilkinson
+'''
+import base64
+import socket
+import urlparse
+from urllib import unquote, addinfourl
+from urllib2 import _parse_proxy, URLError, HTTPError
+from urllib2 import (AbstractHTTPHandler as _AbstractHTTPHandler,
+                     BaseHandler as _BaseHandler,
+                     HTTPRedirectHandler as _HTTPRedirectHandler,
+                     Request as _Request,
+                     OpenerDirector as _OpenerDirector)
+
+from ndg.httpsclient.httplib_proxy import HTTPConnection
+
+
+class Request(_Request):
+
+    def __init__(self, *args, **kw):
+        _Request.__init__(self, *args, **kw)
+        self._tunnel_host = None
+
+    def set_proxy(self, host, type):
+        if self.type == 'https' and not self._tunnel_host:
+            self._tunnel_host = self.host
+        else:
+            self.type = type
+            self.__r_host = self.__original
+        self.host = host
+
+
+class BaseHandler(_BaseHandler):
+    def proxy_open(self, req, proxy, type):
+        if req.get_type() == 'https':
+            orig_type = req.get_type()
+            proxy_type, user, password, hostport = _parse_proxy(proxy)
+            if proxy_type is None:
+                proxy_type = orig_type
+            if user and password:
+                user_pass = '%s:%s' % (unquote(user), unquote(password))
+                creds = base64.b64encode(user_pass).strip()
+                req.add_header('Proxy-authorization', 'Basic ' + creds)
+            hostport = unquote(hostport)
+            req.set_proxy(hostport, proxy_type)
+            # let other handlers take care of it
+            return None
+        else:
+            return _BaseHandler.proxy_open(self, req, proxy, type)
+
+class AbstractHTTPHandler(_AbstractHTTPHandler):
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        host = req.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        h = http_class(host, timeout=req.timeout) # will parse host:port
+        h.set_debuglevel(self._debuglevel)
+
+        headers = dict(req.headers)
+        headers.update(req.unredirected_hdrs)
+        # We want to make an HTTP/1.1 request, but the addinfourl
+        # class isn't prepared to deal with a persistent connection.
+        # It will try to read all remaining data from the socket,
+        # which will block while the server waits for the next request.
+        # So make sure the connection gets closed after the (only)
+        # request.
+        headers["Connection"] = "close"
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
+
+        if not hasattr(req, '_tunnel_host'):
+            pass
+        
+        if req._tunnel_host:
+            h.set_tunnel(req._tunnel_host)
+        try:
+            h.request(req.get_method(), req.get_selector(), req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+
+        r.recv = r.read
+        fp = socket._fileobject(r, close=True)
+
+        resp = addinfourl(fp, r.msg, req.get_full_url())
+        resp.code = r.status
+        resp.msg = r.reason
+        return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+
+    def http_open(self, req):
+        return self.do_open(HTTPConnection, req)
+
+    http_request = AbstractHTTPHandler.do_request_
+
+#if hasattr(httplib, 'HTTPS'):
+#    class HTTPSHandler(AbstractHTTPHandler):
+#
+#        def https_open(self, req):
+#            return self.do_open(httplib.HTTPSConnection, req)
+#
+#        https_request = AbstractHTTPHandler.do_request_
+
+
+class HTTPRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+            or code in (301, 302, 303) and m == "POST"):
+            # Strictly (according to RFC 2616), 301 or 302 in response
+            # to a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we
+            # do the same.
+            # be conciliant with URIs containing a space
+            newurl = newurl.replace(' ', '%20')
+            newheaders = dict((k,v) for k,v in req.headers.items()
+                              if k.lower() not in ("content-length", "content-type")
+                             )
+            return Request(newurl,
+                           headers=newheaders,
+                           origin_req_host=req.get_origin_req_host(),
+                           unverifiable=True)
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    # Implementation note: To avoid the server sending us into an
+    # infinite loop, the request object needs to track what URLs we
+    # have already seen.  Do this by adding a handler-specific
+    # attribute to the Request object.
+    def http_error_302(self, req, fp, code, msg, headers):
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if 'location' in headers:
+            newurl = headers.getheaders('location')[0]
+        elif 'uri' in headers:
+            newurl = headers.getheaders('uri')[0]
+        else:
+            return
+
+        # fix a possible malformed URL
+        urlparts = urlparse.urlparse(newurl)
+        if not urlparts.path:
+            urlparts = list(urlparts)
+            urlparts[2] = "/"
+        newurl = urlparse.urlunparse(urlparts)
+
+        newurl = urlparse.urljoin(req.get_full_url(), newurl)
+
+        # For security reasons we do not allow redirects to protocols
+        # other than HTTP, HTTPS or FTP.
+        newurl_lower = newurl.lower()
+        if not (newurl_lower.startswith('http://') or
+                newurl_lower.startswith('https://') or
+                newurl_lower.startswith('ftp://')):
+            raise HTTPError(newurl, code,
+                            msg + " - Redirection to url '%s' is not allowed" %
+                            newurl,
+                            headers, fp)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(req, fp, code, msg, headers, newurl)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.
+        fp.read()
+        fp.close()
+
+        return self.parent.open(new, timeout=req.timeout)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+              
+
+class OpenerDirector(_OpenerDirector):
+    def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+        # accept a URL or a Request object
+        if isinstance(fullurl, basestring):
+            req = Request(fullurl, data)
+        else:
+            req = fullurl
+            if data is not None:
+                req.add_data(data)
+
+        req.timeout = timeout
+        protocol = req.get_type()
+
+        # pre-process request
+        meth_name = protocol+"_request"
+        for processor in self.process_request.get(protocol, []):
+            meth = getattr(processor, meth_name)
+            req = meth(req)
+
+        response = self._open(req, data)
+        
+        # post-process response
+        meth_name = protocol+"_response"
+        for processor in self.process_response.get(protocol, []):
+            meth = getattr(processor, meth_name)
+            response = meth(req, response)
+
+        return response

+ 7 - 1
ndg/httpsclient/get.py

@@ -3,7 +3,13 @@ import httplib
 import logging
 from optparse import OptionParser
 import os
+import sys
 import urllib2
+if sys.version_info < (2, 6, 2):
+    from ndg.httpsclient.urllib2_proxy import HTTPHandler
+else:
+    from urllib2 import HTTPHandler
+    
 import urlparse
 
 from ndg.httpsclient.urllib2_build_opener import build_opener
@@ -69,7 +75,7 @@ def open_url(url, config):
     handlers = [cookie_handler]
 
     if config.debug:
-        http_handler = urllib2.HTTPHandler(debuglevel=debuglevel)
+        http_handler = HTTPHandler(debuglevel=debuglevel)
         https_handler = HTTPSContextHandler(config.ssl_context, 
                                             debuglevel=debuglevel)
         handlers.extend([http_handler, https_handler])

+ 6 - 4
setup.py

@@ -8,14 +8,16 @@ except ImportError:
 setup(
     name='ndg_httpsclient',
     version="0.1.0",
-    description='Provides HTTPS for httplib and urllib2 using PyOpenSSL',
-    author='Richard Wilkinson',
+    description='Provides enhanced HTTPS support for httplib and urllib2 using '
+                'PyOpenSSL',
+    author='Richard Wilkinson and Philip Kershaw',
     long_description=open('README').read(),
     license='BSD - See LICENCE file for details',
     namespace_packages=['ndg'],
     packages=find_packages(),
+    install_requires = ['PyOpenSSL'],
     classifiers = [
-        'Development Status :: 5 - Production/Stable',
+        'Development Status :: 3 - Alpha',
         'Environment :: Console',
         'Environment :: Web Environment',
         'Intended Audience :: End Users/Desktop',
@@ -36,7 +38,7 @@ setup(
     ],
     zip_safe = False,
     entry_points = {
-        'console_scripts': ['ndg_httpclient = myproxy.script:main',
+        'console_scripts': ['ndg_httpclient = ndg.httpsclient.utils:main',
                             ],
         }
 )