utils.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. """Utilities using NDG HTTPS Client, including a main module that can be used to
  2. fetch from a URL.
  3. """
  4. __author__ = "R B Wilkinson"
  5. __date__ = "09/12/11"
  6. __copyright__ = "(C) 2011 Science and Technology Facilities Council"
  7. __license__ = "BSD - see LICENSE file in top-level directory"
  8. __contact__ = "Philip.Kershaw@stfc.ac.uk"
  9. __revision__ = '$Id$'
  10. import cookielib
  11. import httplib
  12. import logging
  13. from optparse import OptionParser
  14. import os
  15. import urllib2
  16. from urllib2 import (HTTPHandler, HTTPCookieProcessor,
  17. HTTPBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm)
  18. import urlparse
  19. from ndg.httpsclient.urllib2_build_opener import build_opener
  20. from ndg.httpsclient.https import HTTPSContextHandler
  21. from ndg.httpsclient import ssl_context_util
  22. log = logging.getLogger(__name__)
  23. class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
  24. """Cookie processor that adds new cookies (instead of replacing the existing
  25. ones as HTTPCookieProcessor does)
  26. """
  27. def http_request(self, request):
  28. """Processes cookies for a HTTP request.
  29. @param request: request to process
  30. @type request: urllib2.Request
  31. @return: request
  32. @rtype: urllib2.Request
  33. """
  34. COOKIE_HEADER_NAME = "Cookie"
  35. tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
  36. request.origin_req_host,
  37. request.unverifiable)
  38. self.cookiejar.add_cookie_header(tmp_request)
  39. # Combine existing and new cookies.
  40. new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
  41. if new_cookies:
  42. if request.has_header(COOKIE_HEADER_NAME):
  43. # Merge new cookies with existing ones.
  44. old_cookies = request.get_header(COOKIE_HEADER_NAME)
  45. merged_cookies = '; '.join([old_cookies, new_cookies])
  46. request.add_unredirected_header(COOKIE_HEADER_NAME,
  47. merged_cookies)
  48. else:
  49. # No existing cookies so just set new ones.
  50. request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
  51. return request
  52. # Process cookies for HTTPS in the same way.
  53. https_request = http_request
  54. class URLFetchError(Exception):
  55. """Error fetching content from URL"""
  56. def fetch_from_url(url, config, data=None, handlers=None):
  57. """Returns data retrieved from a URL.
  58. @param url: URL to attempt to open
  59. @type url: basestring
  60. @param config: SSL context configuration
  61. @type config: Configuration
  62. @return data retrieved from URL or None
  63. """
  64. return_code, return_message, response = open_url(url, config, data=data,
  65. handlers=handlers)
  66. if return_code and return_code == httplib.OK:
  67. return_data = response.read()
  68. response.close()
  69. return return_data
  70. else:
  71. raise URLFetchError(return_message)
  72. def fetch_from_url_to_file(url, config, output_file, data=None, handlers=None):
  73. """Writes data retrieved from a URL to a file.
  74. @param url: URL to attempt to open
  75. @type url: basestring
  76. @param config: SSL context configuration
  77. @type config: Configuration
  78. @param output_file: output file
  79. @type output_file: basestring
  80. @return: tuple (
  81. returned HTTP status code or 0 if an error occurred
  82. returned message
  83. boolean indicating whether access was successful)
  84. """
  85. return_code, return_message, response = open_url(url, config, data=data,
  86. handlers=handlers)
  87. if return_code == httplib.OK:
  88. return_data = response.read()
  89. response.close()
  90. outfile = open(output_file, "w")
  91. outfile.write(return_data)
  92. outfile.close()
  93. return return_code, return_message, return_code == httplib.OK
  94. def fetch_stream_from_url(url, config, data=None, handlers=None):
  95. """Returns data retrieved from a URL.
  96. @param url: URL to attempt to open
  97. @type url: basestring
  98. @param config: SSL context configuration
  99. @type config: Configuration
  100. @param data: HTTP POST data
  101. @type data: str
  102. @param handlers: list of custom urllib2 handlers to add to the request
  103. @type handlers: iterable
  104. @return: data retrieved from URL or None
  105. @rtype: file derived type
  106. """
  107. return_code, return_message, response = open_url(url, config, data=data,
  108. handlers=handlers)
  109. if return_code and return_code == httplib.OK:
  110. return response
  111. else:
  112. raise URLFetchError(return_message)
  113. def open_url(url, config, data=None, handlers=None):
  114. """Attempts to open a connection to a specified URL.
  115. @param url: URL to attempt to open
  116. @param config: SSL context configuration
  117. @type config: Configuration
  118. @param data: HTTP POST data
  119. @type data: str
  120. @param handlers: list of custom urllib2 handlers to add to the request
  121. @type handlers: iterable
  122. @return: tuple (
  123. returned HTTP status code or 0 if an error occurred
  124. returned message or error description
  125. response object)
  126. """
  127. debuglevel = 1 if config.debug else 0
  128. # Set up handlers for URL opener.
  129. if config.cookie:
  130. cj = config.cookie
  131. else:
  132. cj = cookielib.CookieJar()
  133. # Use a cookie processor that accumulates cookies when redirects occur so
  134. # that an application can redirect for authentication and retain both any
  135. # cookies for the application and the security system (c.f.,
  136. # urllib2.HTTPCookieProcessor which replaces cookies).
  137. cookie_handler = AccumulatingHTTPCookieProcessor(cj)
  138. if not handlers:
  139. handlers = []
  140. handlers.append(cookie_handler)
  141. if config.debug:
  142. http_handler = HTTPHandler(debuglevel=debuglevel)
  143. https_handler = HTTPSContextHandler(config.ssl_context,
  144. debuglevel=debuglevel)
  145. handlers.extend([http_handler, https_handler])
  146. if config.http_basicauth:
  147. # currently only supports http basic auth
  148. auth_handler = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
  149. auth_handler.add_password(realm=None, uri=url,
  150. user=config.httpauth[0],
  151. passwd=config.httpauth[1])
  152. handlers.append(auth_handler)
  153. # Explicitly remove proxy handling if the host is one listed in the value of
  154. # the no_proxy environment variable because urllib2 does use proxy settings
  155. # set via http_proxy and https_proxy, but does not take the no_proxy value
  156. # into account.
  157. if not _should_use_proxy(url, config.no_proxy):
  158. handlers.append(urllib2.ProxyHandler({}))
  159. log.debug("Not using proxy")
  160. elif config.proxies:
  161. handlers.append(urllib2.ProxyHandler(config.proxies))
  162. log.debug("Configuring proxies: %s" % config.proxies)
  163. opener = build_opener(*handlers, ssl_context=config.ssl_context)
  164. headers = config.headers
  165. if headers is None:
  166. headers = {}
  167. request = urllib2.Request(url, data, headers)
  168. # Open the URL and check the response.
  169. return_code = 0
  170. return_message = ''
  171. response = None
  172. try:
  173. response = opener.open(request)
  174. return_message = response.msg
  175. return_code = response.code
  176. if log.isEnabledFor(logging.DEBUG):
  177. for index, cookie in enumerate(cj):
  178. log.debug("%s : %s", index, cookie)
  179. except urllib2.HTTPError, exc:
  180. return_code = exc.code
  181. return_message = "Error: %s" % exc.msg
  182. if log.isEnabledFor(logging.DEBUG):
  183. log.debug("%s %s", exc.code, exc.msg)
  184. except Exception, exc:
  185. return_message = "Error: %s" % exc.__str__()
  186. if log.isEnabledFor(logging.DEBUG):
  187. import traceback
  188. log.debug(traceback.format_exc())
  189. return (return_code, return_message, response)
  190. def _should_use_proxy(url, no_proxy=None):
  191. """Determines whether a proxy should be used to open a connection to the
  192. specified URL, based on the value of the no_proxy environment variable.
  193. @param url: URL
  194. @type url: basestring or urllib2.Request
  195. """
  196. if no_proxy is None:
  197. no_proxy_effective = os.environ.get('no_proxy', '')
  198. else:
  199. no_proxy_effective = no_proxy
  200. urlObj = urlparse.urlparse(_url_as_string(url))
  201. for np in [h.strip() for h in no_proxy_effective.split(',')]:
  202. if urlObj.hostname == np:
  203. return False
  204. return True
  205. def _url_as_string(url):
  206. """Returns the URL string from a URL value that is either a string or
  207. urllib2.Request..
  208. @param url: URL
  209. @type url: basestring or urllib2.Request
  210. @return: URL string
  211. @rtype: basestring
  212. """
  213. if isinstance(url, urllib2.Request):
  214. return url.get_full_url()
  215. elif isinstance(url, basestring):
  216. return url
  217. else:
  218. raise TypeError("Expected type %r or %r" %
  219. (basestring, urllib2.Request))
  220. class Configuration(object):
  221. """Connection configuration.
  222. """
  223. def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
  224. cookie=None, http_basicauth=None, headers=None):
  225. """
  226. @param ssl_context: SSL context to use with this configuration
  227. @type ssl_context: OpenSSL.SSL.Context
  228. @param debug: if True, output debugging information
  229. @type debug: bool
  230. @param proxies: proxies to use for
  231. @type proxies: dict with basestring keys and values
  232. @param no_proxy: hosts for which a proxy should not be used
  233. @type no_proxy: basestring
  234. @param cookie: cookies to set for request
  235. @type cookie: cookielib.CookieJar
  236. @param http_basicauth: http authentication, or None
  237. @type http_basicauth: tuple of (username,password)
  238. @param headers: http headers
  239. @type headers: dict
  240. """
  241. self.ssl_context = ssl_context
  242. self.debug = debug
  243. self.proxies = proxies
  244. self.no_proxy = no_proxy
  245. self.cookie = cookie
  246. self.http_basicauth = http_basicauth
  247. self.headers = headers
  248. def main():
  249. '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
  250. '''
  251. parser = OptionParser(usage="%prog [options] url")
  252. parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
  253. default=os.path.expanduser("~/credentials.pem"),
  254. help="Certificate file - defaults to $HOME/credentials.pem")
  255. parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
  256. default=None,
  257. help="Private key file - defaults to the certificate file")
  258. parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
  259. metavar="PATH",
  260. default=None,
  261. help="Trusted CA certificate file directory")
  262. parser.add_option("-d", "--debug", action="store_true", dest="debug",
  263. default=False,
  264. help="Print debug information.")
  265. parser.add_option("-p", "--post-data-file", dest="data_file",
  266. metavar="FILE", default=None,
  267. help="POST data file")
  268. parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
  269. default=None, help="Output file")
  270. parser.add_option("-n", "--no-verify-peer", action="store_true",
  271. dest="no_verify_peer", default=False,
  272. help="Skip verification of peer certificate.")
  273. parser.add_option("-a", "--basicauth", dest="auth", metavar="USER:PASSWD",
  274. default=None,
  275. help="HTTP authentication credentials")
  276. parser.add_option("--header", action="append", dest="headers",
  277. metavar="HEADER: VALUE",
  278. help="Add HTTP header to request")
  279. (options, args) = parser.parse_args()
  280. if len(args) != 1:
  281. parser.error("Incorrect number of arguments")
  282. url = args[0]
  283. if options.debug:
  284. logging.getLogger().setLevel(logging.DEBUG)
  285. if options.key_file and os.path.exists(options.key_file):
  286. key_file = options.key_file
  287. else:
  288. key_file = None
  289. if options.cert_file and os.path.exists(options.cert_file):
  290. cert_file = options.cert_file
  291. else:
  292. cert_file = None
  293. if options.ca_dir and os.path.exists(options.ca_dir):
  294. ca_dir = options.ca_dir
  295. else:
  296. ca_dir = None
  297. verify_peer = not options.no_verify_peer
  298. if options.data_file and os.path.exists(options.data_file):
  299. data_file = open(options.data_file)
  300. data = data_file.read()
  301. data_file.close()
  302. else:
  303. data = None
  304. if options.basicauth:
  305. http_basicauth = options.auth.split(':', 1)
  306. else:
  307. http_basicauth = None
  308. headers = {}
  309. if options.headers:
  310. for h in options.headers:
  311. key, val = h.split(':', 1)
  312. headers[key.strip()] = val.lstrip()
  313. # If a private key file is not specified, the key is assumed to be stored in
  314. # the certificate file.
  315. ssl_context = ssl_context_util.make_ssl_context(key_file,
  316. cert_file,
  317. None,
  318. ca_dir,
  319. verify_peer,
  320. url)
  321. config = Configuration(ssl_context,
  322. options.debug,
  323. http_basicauth=http_basicauth,
  324. headers=headers)
  325. if options.output_file:
  326. return_code, return_message = fetch_from_url_to_file(
  327. url,
  328. config,
  329. options.output_file,
  330. data)[:2]
  331. raise SystemExit(return_code, return_message)
  332. else:
  333. data = fetch_from_url(url, config)
  334. print(data)
  335. if __name__=='__main__':
  336. logging.basicConfig()
  337. main()