utils.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. """Utilities using NDG HTTPS Client, including a main module that can be used to
  2. fetch from a URL.
  3. """
  4. __author__ = "R B Wilkinson"
  5. __date__ = "09/12/11"
  6. __copyright__ = "(C) 2011 Science and Technology Facilities Council"
  7. __license__ = "BSD - see LICENSE file in top-level directory"
  8. __contact__ = "Philip.Kershaw@stfc.ac.uk"
  9. __revision__ = '$Id$'
  10. import cookielib
  11. import httplib
  12. import logging
  13. from optparse import OptionParser
  14. import os
  15. import urllib2
  16. from urllib2 import HTTPHandler, HTTPCookieProcessor
  17. import urlparse
  18. from ndg.httpsclient.urllib2_build_opener import build_opener
  19. from ndg.httpsclient.https import HTTPSContextHandler
  20. from ndg.httpsclient import ssl_context_util
  21. log = logging.getLogger(__name__)
  22. class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
  23. """Cookie processor that adds new cookies (instead of replacing the existing
  24. ones as HTTPCookieProcessor does)
  25. """
  26. def http_request(self, request):
  27. """Processes cookies for a HTTP request.
  28. @param request: request to process
  29. @type request: urllib2.Request
  30. @return: request
  31. @rtype: urllib2.Request
  32. """
  33. COOKIE_HEADER_NAME = "Cookie"
  34. tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
  35. request.origin_req_host,
  36. request.unverifiable)
  37. self.cookiejar.add_cookie_header(tmp_request)
  38. # Combine existing and new cookies.
  39. new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
  40. if new_cookies:
  41. if request.has_header(COOKIE_HEADER_NAME):
  42. # Merge new cookies with existing ones.
  43. old_cookies = request.get_header(COOKIE_HEADER_NAME)
  44. merged_cookies = '; '.join([old_cookies, new_cookies])
  45. request.add_unredirected_header(COOKIE_HEADER_NAME,
  46. merged_cookies)
  47. else:
  48. # No existing cookies so just set new ones.
  49. request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
  50. return request
  51. # Process cookies for HTTPS in the same way.
  52. https_request = http_request
  53. class URLFetchError(Exception):
  54. """Error fetching content from URL"""
  55. def fetch_from_url(url, config, data=None):
  56. """Returns data retrieved from a URL.
  57. @param url: URL to attempt to open
  58. @type url: basestring
  59. @param config: SSL context configuration
  60. @type config: Configuration
  61. @return data retrieved from URL or None
  62. """
  63. return_code, return_message, response = open_url(url, config, data)
  64. if return_code and return_code == httplib.OK:
  65. return_data = response.read()
  66. response.close()
  67. return return_data
  68. else:
  69. raise URLFetchError(return_message)
  70. def fetch_from_url_to_file(url, config, output_file, data=None):
  71. """Writes data retrieved from a URL to a file.
  72. @param url: URL to attempt to open
  73. @type url: basestring
  74. @param config: SSL context configuration
  75. @type config: Configuration
  76. @param output_file: output file
  77. @type output_file: basestring
  78. @return: tuple (
  79. returned HTTP status code or 0 if an error occurred
  80. returned message
  81. boolean indicating whether access was successful)
  82. """
  83. return_code, return_message, response = open_url(url, config, data)
  84. if return_code == httplib.OK:
  85. return_data = response.read()
  86. response.close()
  87. outfile = open(output_file, "w")
  88. outfile.write(return_data)
  89. outfile.close()
  90. return return_code, return_message, return_code == httplib.OK
  91. def fetch_stream_from_url(url, config, data=None):
  92. """Returns data retrieved from a URL.
  93. @param url: URL to attempt to open
  94. @type url: basestring
  95. @param config: SSL context configuration
  96. @type config: Configuration
  97. @return: data retrieved from URL or None
  98. @rtype: file derived type
  99. """
  100. return_code, return_message, response = open_url(url, config, data)
  101. if return_code and return_code == httplib.OK:
  102. return response
  103. else:
  104. raise URLFetchError(return_message)
  105. def open_url(url, config, data=None):
  106. """Attempts to open a connection to a specified URL.
  107. @param url: URL to attempt to open
  108. @param config: SSL context configuration
  109. @type config: Configuration
  110. @param data: HTTP POST data
  111. @type data: str
  112. @return: tuple (
  113. returned HTTP status code or 0 if an error occurred
  114. returned message or error description
  115. response object)
  116. """
  117. debuglevel = 1 if config.debug else 0
  118. # Set up handlers for URL opener.
  119. if config.cookie:
  120. cj = config.cookie
  121. else:
  122. cj = cookielib.CookieJar()
  123. # Use a cookie processor that accumulates cookies when redirects occur so
  124. # that an application can redirect for authentication and retain both any
  125. # cookies for the application and the security system (c.f.,
  126. # urllib2.HTTPCookieProcessor which replaces cookies).
  127. cookie_handler = AccumulatingHTTPCookieProcessor(cj)
  128. handlers = [cookie_handler]
  129. if config.debug:
  130. http_handler = HTTPHandler(debuglevel=debuglevel)
  131. https_handler = HTTPSContextHandler(config.ssl_context,
  132. debuglevel=debuglevel)
  133. handlers.extend([http_handler, https_handler])
  134. # Explicitly remove proxy handling if the host is one listed in the value of
  135. # the no_proxy environment variable because urllib2 does use proxy settings
  136. # set via http_proxy and https_proxy, but does not take the no_proxy value
  137. # into account.
  138. if not _should_use_proxy(url, config.no_proxy):
  139. handlers.append(urllib2.ProxyHandler({}))
  140. log.debug("Not using proxy")
  141. elif config.proxies:
  142. handlers.append(urllib2.ProxyHandler(config.proxies))
  143. log.debug("Configuring proxies: %s" % config.proxies)
  144. opener = build_opener(*handlers, ssl_context=config.ssl_context)
  145. # Open the URL and check the response.
  146. return_code = 0
  147. return_message = ''
  148. response = None
  149. try:
  150. response = opener.open(url, data)
  151. return_message = response.msg
  152. return_code = response.code
  153. if log.isEnabledFor(logging.DEBUG):
  154. for index, cookie in enumerate(cj):
  155. log.debug("%s : %s", index, cookie)
  156. except urllib2.HTTPError, exc:
  157. return_code = exc.code
  158. return_message = "Error: %s" % exc.msg
  159. if log.isEnabledFor(logging.DEBUG):
  160. log.debug("%s %s", exc.code, exc.msg)
  161. except Exception, exc:
  162. return_message = "Error: %s" % exc.__str__()
  163. if log.isEnabledFor(logging.DEBUG):
  164. import traceback
  165. log.debug(traceback.format_exc())
  166. return (return_code, return_message, response)
  167. def _should_use_proxy(url, no_proxy):
  168. """Determines whether a proxy should be used to open a connection to the
  169. specified URL, based on the value of the no_proxy environment variable.
  170. @param url: URL
  171. @type url: basestring or urllib2.Request
  172. """
  173. if no_proxy is None:
  174. no_proxy_effective = os.environ.get('no_proxy', '')
  175. else:
  176. no_proxy_effective = no_proxy
  177. urlObj = urlparse.urlparse(_url_as_string(url))
  178. for np in [h.strip() for h in no_proxy_effective.split(',')]:
  179. if urlObj.hostname == np:
  180. return False
  181. return True
  182. def _url_as_string(url):
  183. """Returns the URL string from a URL value that is either a string or
  184. urllib2.Request..
  185. @param url: URL
  186. @type url: basestring or urllib2.Request
  187. @return: URL string
  188. @rtype: basestring
  189. """
  190. if isinstance(url, urllib2.Request):
  191. return url.get_full_url()
  192. elif isinstance(url, basestring):
  193. return url
  194. else:
  195. raise TypeError("Expected type %r or %r" %
  196. (basestring, urllib2.Request))
  197. class Configuration(object):
  198. """Connection configuration.
  199. """
  200. def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
  201. cookie=None):
  202. """
  203. @param ssl_context: SSL context to use with this configuration
  204. @type ssl_context: OpenSSL.SSL.Context
  205. @param debug: if True, output debugging information
  206. @type debug: bool
  207. @param proxies: proxies to use for
  208. @type proxies: dict with basestring keys and values
  209. @param no_proxy: hosts for which a proxy should not be used
  210. @type no_proxy: basestring
  211. @param cookie: cookies to set for request
  212. @type cookie: cookielib.CookieJar
  213. """
  214. self.ssl_context = ssl_context
  215. self.debug = debug
  216. self.proxies = proxies
  217. self.no_proxy = no_proxy
  218. self.cookie = cookie
  219. def main():
  220. '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
  221. '''
  222. parser = OptionParser(usage="%prog [options] url")
  223. parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
  224. default=os.path.expanduser("~/credentials.pem"),
  225. help="Certificate file - defaults to $HOME/credentials.pem")
  226. parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
  227. default=None,
  228. help="Private key file - defaults to the certificate file")
  229. parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
  230. metavar="PATH",
  231. default=None,
  232. help="Trusted CA certificate file directory")
  233. parser.add_option("-d", "--debug", action="store_true", dest="debug",
  234. default=False,
  235. help="Print debug information.")
  236. parser.add_option("-p", "--post-data-file", dest="data_file",
  237. metavar="FILE", default=None,
  238. help="POST data file")
  239. parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
  240. default=None, help="Output file")
  241. parser.add_option("-n", "--no-verify-peer", action="store_true",
  242. dest="no_verify_peer", default=False,
  243. help="Skip verification of peer certificate.")
  244. (options, args) = parser.parse_args()
  245. if len(args) != 1:
  246. parser.error("Incorrect number of arguments")
  247. url = args[0]
  248. if options.debug:
  249. logging.getLogger().setLevel(logging.DEBUG)
  250. if options.key_file and os.path.exists(options.key_file):
  251. key_file = options.key_file
  252. else:
  253. key_file = None
  254. if options.cert_file and os.path.exists(options.cert_file):
  255. cert_file = options.cert_file
  256. else:
  257. cert_file = None
  258. if options.ca_dir and os.path.exists(options.ca_dir):
  259. ca_dir = options.ca_dir
  260. else:
  261. ca_dir = None
  262. verify_peer = not options.no_verify_peer
  263. if options.data_file and os.path.exists(options.data_file):
  264. data_file = open(options.data_file)
  265. data = data_file.read()
  266. data_file.close()
  267. else:
  268. data = None
  269. # If a private key file is not specified, the key is assumed to be stored in
  270. # the certificate file.
  271. ssl_context = ssl_context_util.make_ssl_context(key_file,
  272. cert_file,
  273. None,
  274. ca_dir,
  275. verify_peer,
  276. url)
  277. config = Configuration(ssl_context, options.debug)
  278. if options.output_file:
  279. return_code, return_message = fetch_from_url_to_file(url,
  280. config,
  281. options.output_file,
  282. data)[:2]
  283. raise SystemExit(return_code, return_message)
  284. else:
  285. data = fetch_from_url(url, config)
  286. print(data)
  287. if __name__=='__main__':
  288. logging.basicConfig()
  289. main()