|
@@ -1,262 +0,0 @@
|
|
|
-'''
|
|
|
-Created on 12 Jan 2012
|
|
|
-
|
|
|
-@author: rwilkinson
|
|
|
-'''
|
|
|
-import base64
|
|
|
-import socket
|
|
|
-import urlparse
|
|
|
-from urllib import unquote, addinfourl
|
|
|
-from urllib2 import _parse_proxy, URLError, HTTPError
|
|
|
-from urllib2 import (AbstractHTTPHandler as _AbstractHTTPHandler,
|
|
|
- BaseHandler as _BaseHandler,
|
|
|
- HTTPRedirectHandler as _HTTPRedirectHandler,
|
|
|
- Request as _Request,
|
|
|
- OpenerDirector as _OpenerDirector)
|
|
|
-
|
|
|
-from ndg.httpsclient.httplib_proxy import HTTPConnection
|
|
|
-
|
|
|
-
|
|
|
-class Request(_Request):
|
|
|
-
|
|
|
- def __init__(self, *args, **kw):
|
|
|
- _Request.__init__(self, *args, **kw)
|
|
|
- self._tunnel_host = None
|
|
|
-
|
|
|
- def set_proxy(self, host, type):
|
|
|
- if self.type == 'https' and not self._tunnel_host:
|
|
|
- self._tunnel_host = self.host
|
|
|
- else:
|
|
|
- self.type = type
|
|
|
- self.__r_host = self.__original
|
|
|
- self.host = host
|
|
|
-
|
|
|
-
|
|
|
-class BaseHandler(_BaseHandler):
|
|
|
- def proxy_open(self, req, proxy, type):
|
|
|
- if req.get_type() == 'https':
|
|
|
- orig_type = req.get_type()
|
|
|
- proxy_type, user, password, hostport = _parse_proxy(proxy)
|
|
|
- if proxy_type is None:
|
|
|
- proxy_type = orig_type
|
|
|
- if user and password:
|
|
|
- user_pass = '%s:%s' % (unquote(user), unquote(password))
|
|
|
- creds = base64.b64encode(user_pass).strip()
|
|
|
- req.add_header('Proxy-authorization', 'Basic ' + creds)
|
|
|
- hostport = unquote(hostport)
|
|
|
- req.set_proxy(hostport, proxy_type)
|
|
|
- # let other handlers take care of it
|
|
|
- return None
|
|
|
- else:
|
|
|
- return _BaseHandler.proxy_open(self, req, proxy, type)
|
|
|
-
|
|
|
-class AbstractHTTPHandler(_AbstractHTTPHandler):
|
|
|
- def do_open(self, http_class, req):
|
|
|
- """Return an addinfourl object for the request, using http_class.
|
|
|
-
|
|
|
- http_class must implement the HTTPConnection API from httplib.
|
|
|
- The addinfourl return value is a file-like object. It also
|
|
|
- has methods and attributes including:
|
|
|
- - info(): return a mimetools.Message object for the headers
|
|
|
- - geturl(): return the original request URL
|
|
|
- - code: HTTP status code
|
|
|
- """
|
|
|
- host = req.get_host()
|
|
|
- if not host:
|
|
|
- raise URLError('no host given')
|
|
|
-
|
|
|
- h = http_class(host, timeout=req.timeout) # will parse host:port
|
|
|
- h.set_debuglevel(self._debuglevel)
|
|
|
-
|
|
|
- headers = dict(req.headers)
|
|
|
- headers.update(req.unredirected_hdrs)
|
|
|
- # We want to make an HTTP/1.1 request, but the addinfourl
|
|
|
- # class isn't prepared to deal with a persistent connection.
|
|
|
- # It will try to read all remaining data from the socket,
|
|
|
- # which will block while the server waits for the next request.
|
|
|
- # So make sure the connection gets closed after the (only)
|
|
|
- # request.
|
|
|
- headers["Connection"] = "close"
|
|
|
- headers = dict(
|
|
|
- (name.title(), val) for name, val in headers.items())
|
|
|
-
|
|
|
- if not hasattr(req, '_tunnel_host'):
|
|
|
- pass
|
|
|
-
|
|
|
- if req._tunnel_host:
|
|
|
- h.set_tunnel(req._tunnel_host)
|
|
|
- try:
|
|
|
- h.request(req.get_method(), req.get_selector(), req.data, headers)
|
|
|
- r = h.getresponse()
|
|
|
- except socket.error, err: # XXX what error?
|
|
|
- raise URLError(err)
|
|
|
-
|
|
|
- # Pick apart the HTTPResponse object to get the addinfourl
|
|
|
- # object initialized properly.
|
|
|
-
|
|
|
- # Wrap the HTTPResponse object in socket's file object adapter
|
|
|
- # for Windows. That adapter calls recv(), so delegate recv()
|
|
|
- # to read(). This weird wrapping allows the returned object to
|
|
|
- # have readline() and readlines() methods.
|
|
|
-
|
|
|
- # XXX It might be better to extract the read buffering code
|
|
|
- # out of socket._fileobject() and into a base class.
|
|
|
-
|
|
|
- r.recv = r.read
|
|
|
- fp = socket._fileobject(r, close=True)
|
|
|
-
|
|
|
- resp = addinfourl(fp, r.msg, req.get_full_url())
|
|
|
- resp.code = r.status
|
|
|
- resp.msg = r.reason
|
|
|
- return resp
|
|
|
-
|
|
|
-
|
|
|
-class HTTPHandler(AbstractHTTPHandler):
|
|
|
-
|
|
|
- def http_open(self, req):
|
|
|
- return self.do_open(HTTPConnection, req)
|
|
|
-
|
|
|
- http_request = AbstractHTTPHandler.do_request_
|
|
|
-
|
|
|
-#if hasattr(httplib, 'HTTPS'):
|
|
|
-# class HTTPSHandler(AbstractHTTPHandler):
|
|
|
-#
|
|
|
-# def https_open(self, req):
|
|
|
-# return self.do_open(httplib.HTTPSConnection, req)
|
|
|
-#
|
|
|
-# https_request = AbstractHTTPHandler.do_request_
|
|
|
-
|
|
|
-
|
|
|
-class HTTPRedirectHandler(BaseHandler):
|
|
|
- # maximum number of redirections to any single URL
|
|
|
- # this is needed because of the state that cookies introduce
|
|
|
- max_repeats = 4
|
|
|
- # maximum total number of redirections (regardless of URL) before
|
|
|
- # assuming we're in a loop
|
|
|
- max_redirections = 10
|
|
|
-
|
|
|
- def redirect_request(self, req, fp, code, msg, headers, newurl):
|
|
|
- """Return a Request or None in response to a redirect.
|
|
|
-
|
|
|
- This is called by the http_error_30x methods when a
|
|
|
- redirection response is received. If a redirection should
|
|
|
- take place, return a new Request to allow http_error_30x to
|
|
|
- perform the redirect. Otherwise, raise HTTPError if no-one
|
|
|
- else should try to handle this url. Return None if you can't
|
|
|
- but another Handler might.
|
|
|
- """
|
|
|
- m = req.get_method()
|
|
|
- if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
|
|
|
- or code in (301, 302, 303) and m == "POST"):
|
|
|
- # Strictly (according to RFC 2616), 301 or 302 in response
|
|
|
- # to a POST MUST NOT cause a redirection without confirmation
|
|
|
- # from the user (of urllib2, in this case). In practice,
|
|
|
- # essentially all clients do redirect in this case, so we
|
|
|
- # do the same.
|
|
|
- # be conciliant with URIs containing a space
|
|
|
- newurl = newurl.replace(' ', '%20')
|
|
|
- newheaders = dict((k,v) for k,v in req.headers.items()
|
|
|
- if k.lower() not in ("content-length", "content-type")
|
|
|
- )
|
|
|
- return Request(newurl,
|
|
|
- headers=newheaders,
|
|
|
- origin_req_host=req.get_origin_req_host(),
|
|
|
- unverifiable=True)
|
|
|
- else:
|
|
|
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
|
|
|
-
|
|
|
- # Implementation note: To avoid the server sending us into an
|
|
|
- # infinite loop, the request object needs to track what URLs we
|
|
|
- # have already seen. Do this by adding a handler-specific
|
|
|
- # attribute to the Request object.
|
|
|
- def http_error_302(self, req, fp, code, msg, headers):
|
|
|
- # Some servers (incorrectly) return multiple Location headers
|
|
|
- # (so probably same goes for URI). Use first header.
|
|
|
- if 'location' in headers:
|
|
|
- newurl = headers.getheaders('location')[0]
|
|
|
- elif 'uri' in headers:
|
|
|
- newurl = headers.getheaders('uri')[0]
|
|
|
- else:
|
|
|
- return
|
|
|
-
|
|
|
- # fix a possible malformed URL
|
|
|
- urlparts = urlparse.urlparse(newurl)
|
|
|
- if not urlparts.path:
|
|
|
- urlparts = list(urlparts)
|
|
|
- urlparts[2] = "/"
|
|
|
- newurl = urlparse.urlunparse(urlparts)
|
|
|
-
|
|
|
- newurl = urlparse.urljoin(req.get_full_url(), newurl)
|
|
|
-
|
|
|
- # For security reasons we do not allow redirects to protocols
|
|
|
- # other than HTTP, HTTPS or FTP.
|
|
|
- newurl_lower = newurl.lower()
|
|
|
- if not (newurl_lower.startswith('http://') or
|
|
|
- newurl_lower.startswith('https://') or
|
|
|
- newurl_lower.startswith('ftp://')):
|
|
|
- raise HTTPError(newurl, code,
|
|
|
- msg + " - Redirection to url '%s' is not allowed" %
|
|
|
- newurl,
|
|
|
- headers, fp)
|
|
|
-
|
|
|
- # XXX Probably want to forget about the state of the current
|
|
|
- # request, although that might interact poorly with other
|
|
|
- # handlers that also use handler-specific request attributes
|
|
|
- new = self.redirect_request(req, fp, code, msg, headers, newurl)
|
|
|
- if new is None:
|
|
|
- return
|
|
|
-
|
|
|
- # loop detection
|
|
|
- # .redirect_dict has a key url if url was previously visited.
|
|
|
- if hasattr(req, 'redirect_dict'):
|
|
|
- visited = new.redirect_dict = req.redirect_dict
|
|
|
- if (visited.get(newurl, 0) >= self.max_repeats or
|
|
|
- len(visited) >= self.max_redirections):
|
|
|
- raise HTTPError(req.get_full_url(), code,
|
|
|
- self.inf_msg + msg, headers, fp)
|
|
|
- else:
|
|
|
- visited = new.redirect_dict = req.redirect_dict = {}
|
|
|
- visited[newurl] = visited.get(newurl, 0) + 1
|
|
|
-
|
|
|
- # Don't close the fp until we are sure that we won't use it
|
|
|
- # with HTTPError.
|
|
|
- fp.read()
|
|
|
- fp.close()
|
|
|
-
|
|
|
- return self.parent.open(new, timeout=req.timeout)
|
|
|
-
|
|
|
- http_error_301 = http_error_303 = http_error_307 = http_error_302
|
|
|
-
|
|
|
- inf_msg = "The HTTP server returned a redirect error that would " \
|
|
|
- "lead to an infinite loop.\n" \
|
|
|
- "The last 30x error message was:\n"
|
|
|
-
|
|
|
-
|
|
|
-class OpenerDirector(_OpenerDirector):
|
|
|
- def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
|
|
|
- # accept a URL or a Request object
|
|
|
- if isinstance(fullurl, basestring):
|
|
|
- req = Request(fullurl, data)
|
|
|
- else:
|
|
|
- req = fullurl
|
|
|
- if data is not None:
|
|
|
- req.add_data(data)
|
|
|
-
|
|
|
- req.timeout = timeout
|
|
|
- protocol = req.get_type()
|
|
|
-
|
|
|
- # pre-process request
|
|
|
- meth_name = protocol+"_request"
|
|
|
- for processor in self.process_request.get(protocol, []):
|
|
|
- meth = getattr(processor, meth_name)
|
|
|
- req = meth(req)
|
|
|
-
|
|
|
- response = self._open(req, data)
|
|
|
-
|
|
|
- # post-process response
|
|
|
- meth_name = protocol+"_response"
|
|
|
- for processor in self.process_response.get(protocol, []):
|
|
|
- meth = getattr(processor, meth_name)
|
|
|
- response = meth(req, response)
|
|
|
-
|
|
|
- return response
|