from __future__ import absolute_import
from contextlib import contextmanager
import zlib
import io
import logging
from socket import timeout as SocketTimeout
from socket import error as SocketError
from ._collections import HTTPHeaderDict
from .exceptions import (
BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError,
ResponseNotChunked, IncompleteRead, InvalidHeader
)
from .packages.six import string_types as basestring, binary_type, PY3
from .packages.six.moves import http_client as httplib
from .connection import HTTPException, BaseSSLError
from .util.response import is_fp_closed, is_response_to_head
log = logging.getLogger(__name__)
class DeflateDecoder(object):
def __init__(self):
self._first_try = True
self._data = binary_type()
self._obj = zlib.decompressobj()
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
if not data:
return data
if not self._first_try:
return self._obj.decompress(data)
self._data += data
try:
decompressed = self._obj.decompress(data)
if decompressed:
self._first_try = False
self._data = None
return decompressed
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
return self.decompress(self._data)
finally:
self._data = None
class GzipDecoderState(object):
FIRST_MEMBER = 0
OTHER_MEMBERS = 1
SWALLOW_DATA = 2
class GzipDecoder(object):
def __init__(self):
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
self._state = GzipDecoderState.FIRST_MEMBER
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
ret = binary_type()
if self._state == GzipDecoderState.SWALLOW_DATA or not data:
return ret
while True:
try:
ret += self._obj.decompress(data)
except zlib.error:
previous_state = self._state
# Ignore data after the first error
self._state = GzipDecoderState.SWALLOW_DATA
if previous_state == GzipDecoderState.OTHER_MEMBERS:
# Allow trailing garbage acceptable in other gzip clients
return ret
raise
data = self._obj.unused_data
if not data:
return ret
self._state = GzipDecoderState.OTHER_MEMBERS
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
def _get_decoder(mode):
if mode == 'gzip':
return GzipDecoder()
return DeflateDecoder()
class HTTPResponse(io.IOBase):
"""
HTTP Response container.
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
loaded and decoded on-demand when the ``data`` property is accessed. This
class is also compatible with the Python standard library's :mod:`io`
module, and can hence be treated as a readable object in the context of that
framework.
Extra parameters for behaviour not present in httplib.HTTPResponse:
:param preload_content:
If True, the response's body will be preloaded during construction.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
:param original_response:
When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
object, it's convenient to include the original for debug purposes. It's
otherwise unused.
:param retries:
The retries contains the last :class:`~urllib3.util.retry.Retry` that
was used during the request.
:param enforce_content_length:
Enforce content length checking. Body returned by server must match
value of Content-Length header, if present. Otherwise, raise error.
"""
CONTENT_DECODERS = ['gzip', 'deflate']
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None, msg=None,
retries=None, enforce_content_length=False,
request_method=None, request_url=None):
if isinstance(headers, HTTPHeaderDict):
self.headers = headers
else:
self.headers = HTTPHeaderDict(headers)
self.status = status
self.version = version
self.reason = reason
self.strict = strict
self.decode_content = decode_content
self.retries = retries
self.enforce_content_length = enforce_content_length
self._decoder = None
self._body = None
self._fp = None
self._original_response = original_response
self._fp_bytes_read = 0
self.msg = msg
self._request_url = request_url
if body and isinstance(body, (basestring, binary_type)):
self._body = body
self._pool = pool
self._connection = connection
if hasattr(body, 'read'):
self._fp = body
# Are we using the chunked-style of transfer encoding?
self.chunked = False
self.chunk_left = None
tr_enc = self.headers.get('transfer-encoding', '').lower()
# Don't incur the penalty of creating a list and then discarding it
encodings = (enc.strip() for enc in tr_enc.split(","))
if "chunked" in encodings:
self.chunked = True
# Determine length of response
self.length_remaining = self._init_length(request_method)
# If requested, preload the body.
if preload_content and not self._body:
self._body = self.read(decode_content=decode_content)
def get_redirect_location(self):
"""
Should we redirect and where to?
:returns: Truthy redirect location string if we got a redirect status
code and valid location. ``None`` if redirect status and no
location. ``False`` if not a redirect status code.
"""
if self.status in self.REDIRECT_STATUSES:
return self.headers.get('location')
return False
def release_conn(self):
if not self._pool or not self._connection:
return
self._pool._put_conn(self._connection)
self._connection = None
@property
def data(self):
# For backwords-compat with earlier urllib3 0.4 and earlier.
if self._body:
return self._body
if self._fp:
return self.read(cache_content=True)
@property
def connection(self):
return self._connection
def isclosed(self):
return is_fp_closed(self._fp)
def tell(self):
"""
Obtain the number of bytes pulled over the wire so far. May differ from
the amount of content returned by :meth:``HTTPResponse.read`` if bytes
are encoded on the wire (e.g, compressed).
"""
return self._fp_bytes_read
def _init_length(self, request_method):
"""
Set initial length value for Response content if available.
"""
length = self.headers.get('content-length')
if length is not None:
if self.chunked:
# This Response will fail with an IncompleteRead if it can't be
# received as chunked. This method falls back to attempt reading
# the response before raising an exception.
log.warning("Received response with both Content-Length and "
"Transfer-Encoding set. This is expressly forbidden "
"by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
"attempting to process response as Transfer-Encoding: "
"chunked.")
return None
try:
# RFC 7230 section 3.3.2 specifies multiple content lengths can
# be sent in a single Content-Length header
# (e.g. Content-Length: 42, 42). This line ensures the values
# are all valid ints and that as long as the `set` length is 1,
# all values are the same. Otherwise, the header is invalid.
lengths = set([int(val) for val in length.split(',')])
if len(lengths) > 1:
raise InvalidHeader("Content-Length contained multiple "
"unmatching values (%s)" % length)
length = lengths.pop()
except ValueError:
length = None
else:
if length < 0:
length = None
# Convert status to int for comparison
# In some cases, httplib returns a status of "_UNKNOWN"
try:
status = int(self.status)
except ValueError:
status = 0
# Check for responses that shouldn't include a body
if status in (204, 304) or 100 <= status < 200 or request_method == 'HEAD':
length = 0
return length
def _init_decoder(self):
"""
Set-up the _decoder attribute if necessary.
"""
# Note: content-encoding value should be case-insensitive, per RFC 7230
# Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
self._decoder = _get_decoder(content_encoding)
def _decode(self, data, decode_content, flush_decoder):
"""
Decode the data passed in and potentially flush the decoder.
"""
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
except (IOError, zlib.error) as e:
content_encoding = self.headers.get('content-encoding', '').lower()
raise DecodeError(
"Received response with content-encoding: %s, but "
"failed to decode it." % content_encoding, e)
if flush_decoder and decode_content:
data += self._flush_decoder()
return data
def _flush_decoder(self):
"""
Flushes the decoder. Should only be called if the decoder is actually
being used.
"""
if self._decoder:
buf = self._decoder.decompress(b'')
return buf + self._decoder.flush()
return b''
@contextmanager
def _error_catcher(self):
"""
Catch low-level python exceptions, instead re-raising urllib3
variants, so that low-level exceptions are not leaked in the
high-level api.
On exit, release the connection back to the pool.
"""
clean_exit = False
try:
try:
yield
except SocketTimeout:
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
# there is yet no clean way to get at it from this context.
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
except BaseSSLError as e:
# FIXME: Is there a better way to differentiate between SSLErrors?
if 'read operation timed out' not in str(e): # Defensive:
# This shouldn't happen but just in case we're missing an edge
# case, let's avoid swallowing SSL errors.
raise
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
Loading ...