utils.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. """Utilities using NDG HTTPS Client, including a main module that can be used to
  2. fetch from a URL.
  3. """
  4. __author__ = "R B Wilkinson"
  5. __date__ = "09/12/11"
  6. __copyright__ = "(C) 2011 Science and Technology Facilities Council"
  7. __license__ = "BSD - see LICENSE file in top-level directory"
  8. __contact__ = "Philip.Kershaw@stfc.ac.uk"
  9. __revision__ = '$Id$'
  10. import cookielib
  11. import httplib
  12. import logging
  13. from optparse import OptionParser
  14. import os
  15. import urllib2
  16. from urllib2 import HTTPHandler, HTTPCookieProcessor
  17. import urlparse
  18. from ndg.httpsclient.urllib2_build_opener import build_opener
  19. from ndg.httpsclient.https import HTTPSContextHandler
  20. from ndg.httpsclient import ssl_context_util
  21. log = logging.getLogger(__name__)
  22. class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
  23. """Cookie processor that adds new cookies (instead of replacing the existing
  24. ones as HTTPCookieProcessor does)
  25. """
  26. def http_request(self, request):
  27. """Processes cookies for a HTTP request.
  28. @param request: request to process
  29. @type request: urllib2.Request
  30. @return: request
  31. @rtype: urllib2.Request
  32. """
  33. COOKIE_HEADER_NAME = "Cookie"
  34. tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
  35. request.origin_req_host,
  36. request.unverifiable)
  37. self.cookiejar.add_cookie_header(tmp_request)
  38. # Combine existing and new cookies.
  39. new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
  40. if new_cookies:
  41. if request.has_header(COOKIE_HEADER_NAME):
  42. # Merge new cookies with existing ones.
  43. old_cookies = request.get_header(COOKIE_HEADER_NAME)
  44. merged_cookies = '; '.join([old_cookies, new_cookies])
  45. request.add_unredirected_header(COOKIE_HEADER_NAME,
  46. merged_cookies)
  47. else:
  48. # No existing cookies so just set new ones.
  49. request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
  50. return request
  51. # Process cookies for HTTPS in the same way.
  52. https_request = http_request
  53. class URLFetchError(Exception):
  54. """Error fetching content from URL"""
  55. def fetch_from_url(url, config, data=None, handlers=None):
  56. """Returns data retrieved from a URL.
  57. @param url: URL to attempt to open
  58. @type url: basestring
  59. @param config: SSL context configuration
  60. @type config: Configuration
  61. @return data retrieved from URL or None
  62. """
  63. return_code, return_message, response = open_url(url, config, data=data,
  64. handlers=handlers)
  65. if return_code and return_code == httplib.OK:
  66. return_data = response.read()
  67. response.close()
  68. return return_data
  69. else:
  70. raise URLFetchError(return_message)
  71. def fetch_from_url_to_file(url, config, output_file, data=None, handlers=None):
  72. """Writes data retrieved from a URL to a file.
  73. @param url: URL to attempt to open
  74. @type url: basestring
  75. @param config: SSL context configuration
  76. @type config: Configuration
  77. @param output_file: output file
  78. @type output_file: basestring
  79. @return: tuple (
  80. returned HTTP status code or 0 if an error occurred
  81. returned message
  82. boolean indicating whether access was successful)
  83. """
  84. return_code, return_message, response = open_url(url, config, data=data,
  85. handlers=handlers)
  86. if return_code == httplib.OK:
  87. return_data = response.read()
  88. response.close()
  89. outfile = open(output_file, "w")
  90. outfile.write(return_data)
  91. outfile.close()
  92. return return_code, return_message, return_code == httplib.OK
  93. def fetch_stream_from_url(url, config, data=None, handlers=None):
  94. """Returns data retrieved from a URL.
  95. @param url: URL to attempt to open
  96. @type url: basestring
  97. @param config: SSL context configuration
  98. @type config: Configuration
  99. @return: data retrieved from URL or None
  100. @rtype: file derived type
  101. """
  102. return_code, return_message, response = open_url(url, config, data=data,
  103. handlers=handlers)
  104. if return_code and return_code == httplib.OK:
  105. return response
  106. else:
  107. raise URLFetchError(return_message)
  108. def open_url(url, config, data=None, handlers=None):
  109. """Attempts to open a connection to a specified URL.
  110. @param url: URL to attempt to open
  111. @param config: SSL context configuration
  112. @type config: Configuration
  113. @param data: HTTP POST data
  114. @type data: str
  115. @return: tuple (
  116. returned HTTP status code or 0 if an error occurred
  117. returned message or error description
  118. response object)
  119. """
  120. debuglevel = 1 if config.debug else 0
  121. # Set up handlers for URL opener.
  122. if config.cookie:
  123. cj = config.cookie
  124. else:
  125. cj = cookielib.CookieJar()
  126. # Use a cookie processor that accumulates cookies when redirects occur so
  127. # that an application can redirect for authentication and retain both any
  128. # cookies for the application and the security system (c.f.,
  129. # urllib2.HTTPCookieProcessor which replaces cookies).
  130. cookie_handler = AccumulatingHTTPCookieProcessor(cj)
  131. if not handlers:
  132. handlers = []
  133. handlers.append(cookie_handler)
  134. if config.debug:
  135. http_handler = HTTPHandler(debuglevel=debuglevel)
  136. https_handler = HTTPSContextHandler(config.ssl_context,
  137. debuglevel=debuglevel)
  138. handlers.extend([http_handler, https_handler])
  139. # Explicitly remove proxy handling if the host is one listed in the value of
  140. # the no_proxy environment variable because urllib2 does use proxy settings
  141. # set via http_proxy and https_proxy, but does not take the no_proxy value
  142. # into account.
  143. if not _should_use_proxy(url, config.no_proxy):
  144. handlers.append(urllib2.ProxyHandler({}))
  145. log.debug("Not using proxy")
  146. elif config.proxies:
  147. handlers.append(urllib2.ProxyHandler(config.proxies))
  148. log.debug("Configuring proxies: %s" % config.proxies)
  149. opener = build_opener(*handlers, ssl_context=config.ssl_context)
  150. # Open the URL and check the response.
  151. return_code = 0
  152. return_message = ''
  153. response = None
  154. try:
  155. response = opener.open(url, data)
  156. return_message = response.msg
  157. return_code = response.code
  158. if log.isEnabledFor(logging.DEBUG):
  159. for index, cookie in enumerate(cj):
  160. log.debug("%s : %s", index, cookie)
  161. except urllib2.HTTPError, exc:
  162. return_code = exc.code
  163. return_message = "Error: %s" % exc.msg
  164. if log.isEnabledFor(logging.DEBUG):
  165. log.debug("%s %s", exc.code, exc.msg)
  166. except Exception, exc:
  167. return_message = "Error: %s" % exc.__str__()
  168. if log.isEnabledFor(logging.DEBUG):
  169. import traceback
  170. log.debug(traceback.format_exc())
  171. return (return_code, return_message, response)
  172. def _should_use_proxy(url, no_proxy=None):
  173. """Determines whether a proxy should be used to open a connection to the
  174. specified URL, based on the value of the no_proxy environment variable.
  175. @param url: URL
  176. @type url: basestring or urllib2.Request
  177. """
  178. if no_proxy is None:
  179. no_proxy_effective = os.environ.get('no_proxy', '')
  180. else:
  181. no_proxy_effective = no_proxy
  182. urlObj = urlparse.urlparse(_url_as_string(url))
  183. for np in [h.strip() for h in no_proxy_effective.split(',')]:
  184. if urlObj.hostname == np:
  185. return False
  186. return True
  187. def _url_as_string(url):
  188. """Returns the URL string from a URL value that is either a string or
  189. urllib2.Request..
  190. @param url: URL
  191. @type url: basestring or urllib2.Request
  192. @return: URL string
  193. @rtype: basestring
  194. """
  195. if isinstance(url, urllib2.Request):
  196. return url.get_full_url()
  197. elif isinstance(url, basestring):
  198. return url
  199. else:
  200. raise TypeError("Expected type %r or %r" %
  201. (basestring, urllib2.Request))
  202. class Configuration(object):
  203. """Connection configuration.
  204. """
  205. def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
  206. cookie=None):
  207. """
  208. @param ssl_context: SSL context to use with this configuration
  209. @type ssl_context: OpenSSL.SSL.Context
  210. @param debug: if True, output debugging information
  211. @type debug: bool
  212. @param proxies: proxies to use for
  213. @type proxies: dict with basestring keys and values
  214. @param no_proxy: hosts for which a proxy should not be used
  215. @type no_proxy: basestring
  216. @param cookie: cookies to set for request
  217. @type cookie: cookielib.CookieJar
  218. """
  219. self.ssl_context = ssl_context
  220. self.debug = debug
  221. self.proxies = proxies
  222. self.no_proxy = no_proxy
  223. self.cookie = cookie
  224. def main():
  225. '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
  226. '''
  227. parser = OptionParser(usage="%prog [options] url")
  228. parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
  229. default=os.path.expanduser("~/credentials.pem"),
  230. help="Certificate file - defaults to $HOME/credentials.pem")
  231. parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
  232. default=None,
  233. help="Private key file - defaults to the certificate file")
  234. parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
  235. metavar="PATH",
  236. default=None,
  237. help="Trusted CA certificate file directory")
  238. parser.add_option("-d", "--debug", action="store_true", dest="debug",
  239. default=False,
  240. help="Print debug information.")
  241. parser.add_option("-p", "--post-data-file", dest="data_file",
  242. metavar="FILE", default=None,
  243. help="POST data file")
  244. parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
  245. default=None, help="Output file")
  246. parser.add_option("-n", "--no-verify-peer", action="store_true",
  247. dest="no_verify_peer", default=False,
  248. help="Skip verification of peer certificate.")
  249. (options, args) = parser.parse_args()
  250. if len(args) != 1:
  251. parser.error("Incorrect number of arguments")
  252. url = args[0]
  253. if options.debug:
  254. logging.getLogger().setLevel(logging.DEBUG)
  255. if options.key_file and os.path.exists(options.key_file):
  256. key_file = options.key_file
  257. else:
  258. key_file = None
  259. if options.cert_file and os.path.exists(options.cert_file):
  260. cert_file = options.cert_file
  261. else:
  262. cert_file = None
  263. if options.ca_dir and os.path.exists(options.ca_dir):
  264. ca_dir = options.ca_dir
  265. else:
  266. ca_dir = None
  267. verify_peer = not options.no_verify_peer
  268. if options.data_file and os.path.exists(options.data_file):
  269. data_file = open(options.data_file)
  270. data = data_file.read()
  271. data_file.close()
  272. else:
  273. data = None
  274. # If a private key file is not specified, the key is assumed to be stored in
  275. # the certificate file.
  276. ssl_context = ssl_context_util.make_ssl_context(key_file,
  277. cert_file,
  278. None,
  279. ca_dir,
  280. verify_peer,
  281. url)
  282. config = Configuration(ssl_context, options.debug)
  283. if options.output_file:
  284. return_code, return_message = fetch_from_url_to_file(url,
  285. config,
  286. options.output_file,
  287. data)[:2]
  288. raise SystemExit(return_code, return_message)
  289. else:
  290. data = fetch_from_url(url, config)
  291. print(data)
  292. if __name__=='__main__':
  293. logging.basicConfig()
  294. main()