utils.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. """Utilities using NDG HTTPS Client, including a main module that can be used to
  2. fetch from a URL.
  3. """
  4. __author__ = "R B Wilkinson"
  5. __date__ = "09/12/11"
  6. __copyright__ = "(C) 2011 Science and Technology Facilities Council"
  7. __license__ = "BSD - see LICENSE file in top-level directory"
  8. __contact__ = "Philip.Kershaw@stfc.ac.uk"
  9. __revision__ = '$Id$'
  10. import cookielib
  11. import httplib
  12. import logging
  13. from optparse import OptionParser
  14. import os
  15. import urllib2
  16. from urllib2 import HTTPHandler, HTTPCookieProcessor
  17. import urlparse
  18. from ndg.httpsclient.urllib2_build_opener import build_opener
  19. from ndg.httpsclient.https import HTTPSContextHandler
  20. from ndg.httpsclient import ssl_context_util
  21. log = logging.getLogger(__name__)
  22. class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
  23. """Cookie processor that adds new cookies (instead of replacing the existing
  24. ones as HTTPCookieProcessor does)
  25. """
  26. def http_request(self, request):
  27. """Processes cookies for a HTTP request.
  28. @param request: request to process
  29. @type request: urllib2.Request
  30. @return: request
  31. @rtype: urllib2.Request
  32. """
  33. COOKIE_HEADER_NAME = "Cookie"
  34. tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
  35. request.origin_req_host,
  36. request.unverifiable)
  37. self.cookiejar.add_cookie_header(tmp_request)
  38. # Combine existing and new cookies.
  39. new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
  40. if new_cookies:
  41. if request.has_header(COOKIE_HEADER_NAME):
  42. # Merge new cookies with existing ones.
  43. old_cookies = request.get_header(COOKIE_HEADER_NAME)
  44. merged_cookies = '; '.join([old_cookies, new_cookies])
  45. request.add_unredirected_header(COOKIE_HEADER_NAME,
  46. merged_cookies)
  47. else:
  48. # No existing cookies so just set new ones.
  49. request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
  50. return request
  51. # Process cookies for HTTPS in the same way.
  52. https_request = http_request
  53. class URLFetchError(Exception):
  54. """Error fetching content from URL"""
  55. def fetch_from_url(url, config):
  56. """Returns data retrieved from a URL.
  57. @param url: URL to attempt to open
  58. @param config: SSL context configuration
  59. @type config: Configuration
  60. @return data retrieved from URL or None
  61. """
  62. return_code, return_message, response = open_url(url, config)
  63. if return_code and return_code == httplib.OK:
  64. return_data = response.read()
  65. response.close()
  66. return return_data
  67. else:
  68. raise URLFetchError(return_message)
  69. def fetch_from_url_to_file(url, config, output_file):
  70. """Writes data retrieved from a URL to a file.
  71. @param url: URL to attempt to open
  72. @param config: SSL context configuration
  73. @type config: Configuration
  74. @param output_file: output file
  75. @return: tuple (
  76. returned HTTP status code or 0 if an error occurred
  77. returned message
  78. boolean indicating whether access was successful)
  79. """
  80. return_code, return_message, response = open_url(url, config)
  81. if return_code == httplib.OK:
  82. return_data = response.read()
  83. response.close()
  84. outfile = open(output_file, "w")
  85. outfile.write(return_data)
  86. outfile.close()
  87. return return_code, return_message, return_code == httplib.OK
  88. def open_url(url, config):
  89. """Attempts to open a connection to a specified URL.
  90. @param url: URL to attempt to open
  91. @param config: SSL context configuration
  92. @type config: Configuration
  93. @return: tuple (
  94. returned HTTP status code or 0 if an error occurred
  95. returned message or error description
  96. response object)
  97. """
  98. debuglevel = 1 if config.debug else 0
  99. # Set up handlers for URL opener.
  100. if config.cookie:
  101. cj = config.cookie
  102. else:
  103. cj = cookielib.CookieJar()
  104. # Use a cookie processor that accumulates cookies when redirects occur so
  105. # that an application can redirect for authentication and retain both any
  106. # cookies for the application and the security system (c.f.,
  107. # urllib2.HTTPCookieProcessor which replaces cookies).
  108. cookie_handler = AccumulatingHTTPCookieProcessor(cj)
  109. handlers = [cookie_handler]
  110. if config.debug:
  111. http_handler = HTTPHandler(debuglevel=debuglevel)
  112. https_handler = HTTPSContextHandler(config.ssl_context,
  113. debuglevel=debuglevel)
  114. handlers.extend([http_handler, https_handler])
  115. # Explicitly remove proxy handling if the host is one listed in the value of
  116. # the no_proxy environment variable because urllib2 does use proxy settings
  117. # set via http_proxy and https_proxy, but does not take the no_proxy value
  118. # into account.
  119. if not _should_use_proxy(url, config.no_proxy):
  120. handlers.append(urllib2.ProxyHandler({}))
  121. log.debug("Not using proxy")
  122. elif config.proxies:
  123. handlers.append(urllib2.ProxyHandler(config.proxies))
  124. log.debug("Configuring proxies: %s" % config.proxies)
  125. opener = build_opener(config.ssl_context, *handlers)
  126. # Open the URL and check the response.
  127. return_code = 0
  128. return_message = ''
  129. response = None
  130. try:
  131. response = opener.open(url)
  132. return_message = response.msg
  133. return_code = response.code
  134. if log.isEnabledFor(logging.DEBUG):
  135. for index, cookie in enumerate(cj):
  136. log.debug("%s : %s", index, cookie)
  137. except urllib2.HTTPError, exc:
  138. return_code = exc.code
  139. return_message = "Error: %s" % exc.msg
  140. if log.isEnabledFor(logging.DEBUG):
  141. log.debug("%s %s", exc.code, exc.msg)
  142. except Exception, exc:
  143. return_message = "Error: %s" % exc.__str__()
  144. if log.isEnabledFor(logging.DEBUG):
  145. import traceback
  146. log.debug(traceback.format_exc())
  147. return (return_code, return_message, response)
  148. def _should_use_proxy(url, no_proxy):
  149. """Determines whether a proxy should be used to open a connection to the
  150. specified URL, based on the value of the no_proxy environment variable.
  151. @param url: URL
  152. @type url: basestring or urllib2.Request
  153. """
  154. if no_proxy is None:
  155. no_proxy_effective = os.environ.get('no_proxy', '')
  156. else:
  157. no_proxy_effective = no_proxy
  158. urlObj = urlparse.urlparse(_url_as_string(url))
  159. for np in [h.strip() for h in no_proxy_effective.split(',')]:
  160. if urlObj.hostname == np:
  161. return False
  162. return True
  163. def _url_as_string(url):
  164. """Returns the URL string from a URL value that is either a string or
  165. urllib2.Request..
  166. @param url: URL
  167. @type url: basestring or urllib2.Request
  168. @return: URL string
  169. @rtype: basestring
  170. """
  171. if isinstance(url, urllib2.Request):
  172. return url.get_full_url()
  173. elif isinstance(url, basestring):
  174. return url
  175. else:
  176. raise TypeError("Expected type %r or %r" %
  177. (basestring, urllib2.Request))
  178. class Configuration(object):
  179. """Checker configuration.
  180. """
  181. def __init__(self, ssl_context, debug, proxies=None, no_proxy=None,
  182. cookie=None):
  183. """
  184. @param ssl_context: SSL context to use with this configuration
  185. @type ssl_context: OpenSSL.SSL.Context
  186. @param debug: if True, output debugging information
  187. @type debug: bool
  188. @param proxies: proxies to use for
  189. @type proxies: dict with basestring keys and values
  190. @param no_proxy: hosts for which a proxy should not be used
  191. @type no_proxy: basestring
  192. @param cookie: cookies to set for request
  193. @type cookie: cookielib.CookieJar
  194. """
  195. self.ssl_context = ssl_context
  196. self.debug = debug
  197. self.proxies = proxies
  198. self.no_proxy = no_proxy
  199. self.cookie = cookie
  200. def main():
  201. '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
  202. '''
  203. parser = OptionParser(usage="%prog [options] url")
  204. parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
  205. default=None,
  206. help="Private key file.")
  207. parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
  208. default=os.path.expanduser("~/credentials.pem"),
  209. help="Certificate file.")
  210. parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
  211. metavar="PATH",
  212. default=None,
  213. help="Trusted CA certificate file directory.")
  214. parser.add_option("-d", "--debug", action="store_true", dest="debug",
  215. default=False,
  216. help="Print debug information.")
  217. parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
  218. default=None, help="Output file.")
  219. parser.add_option("-n", "--no-verify-peer", action="store_true",
  220. dest="no_verify_peer", default=False,
  221. help="Skip verification of peer certificate.")
  222. (options, args) = parser.parse_args()
  223. if len(args) != 1:
  224. parser.error("Incorrect number of arguments")
  225. url = args[0]
  226. if options.debug:
  227. logging.getLogger().setLevel(logging.DEBUG)
  228. if options.key_file and os.path.exists(options.key_file):
  229. key_file = options.key_file
  230. else:
  231. key_file = None
  232. if options.cert_file and os.path.exists(options.cert_file):
  233. cert_file = options.cert_file
  234. else:
  235. cert_file = None
  236. if options.ca_dir and os.path.exists(options.ca_dir):
  237. ca_dir = options.ca_dir
  238. else:
  239. ca_dir = None
  240. verify_peer = not options.no_verify_peer
  241. # If a private key file is not specified, the key is assumed to be stored in
  242. # the certificate file.
  243. ssl_context = ssl_context_util.make_ssl_context(key_file,
  244. cert_file,
  245. None,
  246. ca_dir,
  247. verify_peer,
  248. url)
  249. config = Configuration(ssl_context, options.debug)
  250. if options.output_file:
  251. return_code, return_message = fetch_from_url_to_file(url,
  252. config,
  253. options.output_file)[:2]
  254. raise SystemExit(return_code, return_message)
  255. else:
  256. data = fetch_from_url(url, config)
  257. print(data)
  258. if __name__=='__main__':
  259. logging.basicConfig()
  260. main()