123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- """Utilities using NDG HTTPS Client, including a main module that can be used to
- fetch from a URL.
- """
- __author__ = "R B Wilkinson"
- __date__ = "09/12/11"
- __copyright__ = "(C) 2011 Science and Technology Facilities Council"
- __license__ = "BSD - see LICENSE file in top-level directory"
- __contact__ = "Philip.Kershaw@stfc.ac.uk"
- __revision__ = '$Id$'
- import cookielib
- import httplib
- import logging
- from optparse import OptionParser
- import os
- import urllib2
- from urllib2 import HTTPHandler, HTTPCookieProcessor
- import urlparse
- from ndg.httpsclient.urllib2_build_opener import build_opener
- from ndg.httpsclient.https import HTTPSContextHandler
- from ndg.httpsclient import ssl_context_util
- log = logging.getLogger(__name__)
- class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
- """Cookie processor that adds new cookies (instead of replacing the existing
- ones as HTTPCookieProcessor does)
- """
- def http_request(self, request):
- """Processes cookies for a HTTP request.
- @param request: request to process
- @type request: urllib2.Request
- @return: request
- @rtype: urllib2.Request
- """
- COOKIE_HEADER_NAME = "Cookie"
- tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
- request.origin_req_host,
- request.unverifiable)
- self.cookiejar.add_cookie_header(tmp_request)
- # Combine existing and new cookies.
- new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
- if new_cookies:
- if request.has_header(COOKIE_HEADER_NAME):
- # Merge new cookies with existing ones.
- old_cookies = request.get_header(COOKIE_HEADER_NAME)
- merged_cookies = '; '.join([old_cookies, new_cookies])
- request.add_unredirected_header(COOKIE_HEADER_NAME,
- merged_cookies)
- else:
- # No existing cookies so just set new ones.
- request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
- return request
- # Process cookies for HTTPS in the same way.
- https_request = http_request
- class URLFetchError(Exception):
- """Error fetching content from URL"""
-
- def fetch_from_url(url, config, data=None):
- """Returns data retrieved from a URL.
- @param url: URL to attempt to open
- @type url: basestring
- @param config: SSL context configuration
- @type config: Configuration
- @return data retrieved from URL or None
- """
- return_code, return_message, response = open_url(url, config, data)
- if return_code and return_code == httplib.OK:
- return_data = response.read()
- response.close()
- return return_data
- else:
- raise URLFetchError(return_message)
- def fetch_from_url_to_file(url, config, output_file, data=None):
- """Writes data retrieved from a URL to a file.
- @param url: URL to attempt to open
- @type url: basestring
- @param config: SSL context configuration
- @type config: Configuration
- @param output_file: output file
- @type output_file: basestring
- @return: tuple (
- returned HTTP status code or 0 if an error occurred
- returned message
- boolean indicating whether access was successful)
- """
- return_code, return_message, response = open_url(url, config, data)
- if return_code == httplib.OK:
- return_data = response.read()
- response.close()
- outfile = open(output_file, "w")
- outfile.write(return_data)
- outfile.close()
- return return_code, return_message, return_code == httplib.OK
- def fetch_stream_from_url(url, config, data=None):
- """Returns data retrieved from a URL.
- @param url: URL to attempt to open
- @type url: basestring
- @param config: SSL context configuration
- @type config: Configuration
- @return: data retrieved from URL or None
- @rtype: file derived type
- """
- return_code, return_message, response = open_url(url, config, data)
- if return_code and return_code == httplib.OK:
- return response
- else:
- raise URLFetchError(return_message)
- def open_url(url, config, data=None):
- """Attempts to open a connection to a specified URL.
- @param url: URL to attempt to open
- @param config: SSL context configuration
- @type config: Configuration
- @param data: HTTP POST data
- @type data: str
- @return: tuple (
- returned HTTP status code or 0 if an error occurred
- returned message or error description
- response object)
- """
- debuglevel = 1 if config.debug else 0
- # Set up handlers for URL opener.
- if config.cookie:
- cj = config.cookie
- else:
- cj = cookielib.CookieJar()
- # Use a cookie processor that accumulates cookies when redirects occur so
- # that an application can redirect for authentication and retain both any
- # cookies for the application and the security system (c.f.,
- # urllib2.HTTPCookieProcessor which replaces cookies).
- cookie_handler = AccumulatingHTTPCookieProcessor(cj)
- handlers = [cookie_handler]
- if config.debug:
- http_handler = HTTPHandler(debuglevel=debuglevel)
- https_handler = HTTPSContextHandler(config.ssl_context,
- debuglevel=debuglevel)
- handlers.extend([http_handler, https_handler])
- # Explicitly remove proxy handling if the host is one listed in the value of
- # the no_proxy environment variable because urllib2 does use proxy settings
- # set via http_proxy and https_proxy, but does not take the no_proxy value
- # into account.
- if not _should_use_proxy(url, config.no_proxy):
- handlers.append(urllib2.ProxyHandler({}))
- log.debug("Not using proxy")
- elif config.proxies:
- handlers.append(urllib2.ProxyHandler(config.proxies))
- log.debug("Configuring proxies: %s" % config.proxies)
- opener = build_opener(*handlers, ssl_context=config.ssl_context)
- # Open the URL and check the response.
- return_code = 0
- return_message = ''
- response = None
- try:
- response = opener.open(url, data)
- return_message = response.msg
- return_code = response.code
- if log.isEnabledFor(logging.DEBUG):
- for index, cookie in enumerate(cj):
- log.debug("%s : %s", index, cookie)
- except urllib2.HTTPError, exc:
- return_code = exc.code
- return_message = "Error: %s" % exc.msg
- if log.isEnabledFor(logging.DEBUG):
- log.debug("%s %s", exc.code, exc.msg)
- except Exception, exc:
- return_message = "Error: %s" % exc.__str__()
- if log.isEnabledFor(logging.DEBUG):
- import traceback
- log.debug(traceback.format_exc())
- return (return_code, return_message, response)
- def _should_use_proxy(url, no_proxy):
- """Determines whether a proxy should be used to open a connection to the
- specified URL, based on the value of the no_proxy environment variable.
- @param url: URL
- @type url: basestring or urllib2.Request
- """
- if no_proxy is None:
- no_proxy_effective = os.environ.get('no_proxy', '')
- else:
- no_proxy_effective = no_proxy
- urlObj = urlparse.urlparse(_url_as_string(url))
- for np in [h.strip() for h in no_proxy_effective.split(',')]:
- if urlObj.hostname == np:
- return False
- return True
- def _url_as_string(url):
- """Returns the URL string from a URL value that is either a string or
- urllib2.Request..
- @param url: URL
- @type url: basestring or urllib2.Request
- @return: URL string
- @rtype: basestring
- """
- if isinstance(url, urllib2.Request):
- return url.get_full_url()
- elif isinstance(url, basestring):
- return url
- else:
- raise TypeError("Expected type %r or %r" %
- (basestring, urllib2.Request))
- class Configuration(object):
- """Connection configuration.
- """
- def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
- cookie=None):
- """
- @param ssl_context: SSL context to use with this configuration
- @type ssl_context: OpenSSL.SSL.Context
- @param debug: if True, output debugging information
- @type debug: bool
- @param proxies: proxies to use for
- @type proxies: dict with basestring keys and values
- @param no_proxy: hosts for which a proxy should not be used
- @type no_proxy: basestring
- @param cookie: cookies to set for request
- @type cookie: cookielib.CookieJar
- """
- self.ssl_context = ssl_context
- self.debug = debug
- self.proxies = proxies
- self.no_proxy = no_proxy
- self.cookie = cookie
- def main():
- '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
- '''
- parser = OptionParser(usage="%prog [options] url")
- parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
- default=os.path.expanduser("~/credentials.pem"),
- help="Certificate file - defaults to $HOME/credentials.pem")
- parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
- default=None,
- help="Private key file - defaults to the certificate file")
- parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
- metavar="PATH",
- default=None,
- help="Trusted CA certificate file directory")
- parser.add_option("-d", "--debug", action="store_true", dest="debug",
- default=False,
- help="Print debug information.")
- parser.add_option("-p", "--post-data-file", dest="data_file",
- metavar="FILE", default=None,
- help="POST data file")
- parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
- default=None, help="Output file")
- parser.add_option("-n", "--no-verify-peer", action="store_true",
- dest="no_verify_peer", default=False,
- help="Skip verification of peer certificate.")
- (options, args) = parser.parse_args()
- if len(args) != 1:
- parser.error("Incorrect number of arguments")
- url = args[0]
- if options.debug:
- logging.getLogger().setLevel(logging.DEBUG)
- if options.key_file and os.path.exists(options.key_file):
- key_file = options.key_file
- else:
- key_file = None
-
- if options.cert_file and os.path.exists(options.cert_file):
- cert_file = options.cert_file
- else:
- cert_file = None
-
- if options.ca_dir and os.path.exists(options.ca_dir):
- ca_dir = options.ca_dir
- else:
- ca_dir = None
-
- verify_peer = not options.no_verify_peer
- if options.data_file and os.path.exists(options.data_file):
- data_file = open(options.data_file)
- data = data_file.read()
- data_file.close()
- else:
- data = None
-
- # If a private key file is not specified, the key is assumed to be stored in
- # the certificate file.
- ssl_context = ssl_context_util.make_ssl_context(key_file,
- cert_file,
- None,
- ca_dir,
- verify_peer,
- url)
- config = Configuration(ssl_context, options.debug)
- if options.output_file:
- return_code, return_message = fetch_from_url_to_file(url,
- config,
- options.output_file,
- data)[:2]
- raise SystemExit(return_code, return_message)
- else:
- data = fetch_from_url(url, config)
- print(data)
- if __name__=='__main__':
- logging.basicConfig()
- main()
|