# -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
HTTP client.
"""
from __future__ import division, absolute_import
import os
try:
from urlparse import urlunparse
from urllib import splithost, splittype
except ImportError:
from urllib.parse import splithost, splittype
from urllib.parse import urlunparse as _urlunparse
def urlunparse(parts):
result = _urlunparse(tuple([p.decode("charmap") for p in parts]))
return result.encode("charmap")
import zlib
from zope.interface import implementer
from twisted.python import log
from twisted.python.failure import Failure
from twisted.web import http
from twisted.internet import defer, protocol, task, reactor
from twisted.internet.interfaces import IProtocol
from twisted.python import failure
from twisted.python.components import proxyForInterface
from twisted.web import error
from twisted.web.http_headers import Headers
from .endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
from .iweb import IResponse, UNKNOWN_LENGTH, IBodyProducer
class PartialDownloadError(error.Error):
"""
Page was only partially downloaded, we got disconnected in middle.
@ivar response: All of the response body which was downloaded.
"""
class _URL(tuple):
"""
A parsed URL.
At some point this should be replaced with a better URL implementation.
"""
def __new__(self, scheme, host, port, path):
return tuple.__new__(_URL, (scheme, host, port, path))
def __init__(self, scheme, host, port, path):
self.scheme = scheme
self.host = host
self.port = port
self.path = path
def _parse(url, defaultPort=None):
"""
Split the given URL into the scheme, host, port, and path.
@type url: C{bytes}
@param url: An URL to parse.
@type defaultPort: C{int} or C{None}
@param defaultPort: An alternate value to use as the port if the URL does
not include one.
@return: A four-tuple of the scheme, host, port, and path of the URL. All
of these are C{bytes} instances except for port, which is an C{int}.
"""
url = url.strip()
parsed = http.urlparse(url)
scheme = parsed[0]
path = urlunparse((b'', b'') + parsed[2:])
if defaultPort is None:
if scheme == b'https':
defaultPort = 443
else:
defaultPort = 80
host, port = parsed[1], defaultPort
if b':' in host:
host, port = host.split(b':')
try:
port = int(port)
except ValueError:
port = defaultPort
if path == b'':
path = b'/'
return _URL(scheme, host, port, path)
def _makeGetterFactory(url, factoryFactory, contextFactory=None,
*args, **kwargs):
"""
Create and connect an HTTP page getting factory.
Any additional positional or keyword arguments are used when calling
C{factoryFactory}.
@param factoryFactory: Factory factory that is called with C{url}, C{args}
and C{kwargs} to produce the getter
@param contextFactory: Context factory to use when creating a secure
connection, defaulting to C{None}
@return: The factory created by C{factoryFactory}
"""
scheme, host, port, path = _parse(url)
factory = factoryFactory(url, *args, **kwargs)
if scheme == b'https':
from twisted.internet import ssl
if contextFactory is None:
contextFactory = ssl.ClientContextFactory()
reactor.connectSSL(host, port, factory, contextFactory)
else:
reactor.connectTCP(host, port, factory)
return factory
# The code which follows is based on the new HTTP client implementation. It
# should be significantly better than anything above, though it is not yet
# feature equivalent.
from twisted.web.error import SchemeNotSupported
from ._newclient import Request, Response, HTTP11ClientProtocol
from ._newclient import ResponseDone, ResponseFailed
from ._newclient import RequestNotSent, RequestTransmissionFailed
from ._newclient import (
ResponseNeverReceived, PotentialDataLoss, _WrapperException)
try:
from twisted.internet.ssl import ClientContextFactory
except ImportError:
class WebClientContextFactory(object):
"""
A web context factory which doesn't work because the necessary SSL
support is missing.
"""
def getContext(self, hostname, port):
raise NotImplementedError("SSL support unavailable")
else:
class WebClientContextFactory(ClientContextFactory):
"""
A web context factory which ignores the hostname and port and does no
certificate verification.
"""
def getContext(self, hostname, port):
return ClientContextFactory.getContext(self)
class _WebToNormalContextFactory(object):
"""
Adapt a web context factory to a normal context factory.
@ivar _webContext: A web context factory which accepts a hostname and port
number to its C{getContext} method.
@ivar _hostname: The hostname which will be passed to
C{_webContext.getContext}.
@ivar _port: The port number which will be passed to
C{_webContext.getContext}.
"""
def __init__(self, webContext, hostname, port):
self._webContext = webContext
self._hostname = hostname
self._port = port
def getContext(self):
"""
Called the wrapped web context factory's C{getContext} method with a
hostname and port number and return the resulting context object.
"""
return self._webContext.getContext(self._hostname, self._port)
@implementer(IBodyProducer)
class FileBodyProducer(object):
"""
L{FileBodyProducer} produces bytes from an input file object incrementally
and writes them to a consumer.
Since file-like objects cannot be read from in an event-driven manner,
L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from
the file. This process is also paused and resumed based on notifications
from the L{IConsumer} provider being written to.
The file is closed after it has been read, or if the producer is stopped
early.
@ivar _inputFile: Any file-like object, bytes read from which will be
written to a consumer.
@ivar _cooperate: A method like L{Cooperator.cooperate} which is used to
schedule all reads.
@ivar _readSize: The number of bytes to read from C{_inputFile} at a time.
"""
# Python 2.4 doesn't have these symbolic constants
_SEEK_SET = getattr(os, 'SEEK_SET', 0)
_SEEK_END = getattr(os, 'SEEK_END', 2)
def __init__(self, inputFile, cooperator=task, readSize=2 ** 16):
self._inputFile = inputFile
self._cooperate = cooperator.cooperate
self._readSize = readSize
self.length = self._determineLength(inputFile)
def _determineLength(self, fObj):
"""
Determine how many bytes can be read out of C{fObj} (assuming it is not
modified from this point on). If the determination cannot be made,
return C{UNKNOWN_LENGTH}.
"""
try:
seek = fObj.seek
tell = fObj.tell
except AttributeError:
return UNKNOWN_LENGTH
originalPosition = tell()
seek(0, self._SEEK_END)
end = tell()
seek(originalPosition, self._SEEK_SET)
return end - originalPosition
def stopProducing(self):
"""
Permanently stop writing bytes from the file to the consumer by
stopping the underlying L{CooperativeTask}.
"""
self._inputFile.close()
self._task.stop()
def startProducing(self, consumer):
"""
Start a cooperative task which will read bytes from the input file and
write them to C{consumer}. Return a L{Deferred} which fires after all
bytes have been written.
@param consumer: Any L{IConsumer} provider
"""
self._task = self._cooperate(self._writeloop(consumer))
d = self._task.whenDone()
def maybeStopped(reason):
# IBodyProducer.startProducing's Deferred isn't support to fire if
# stopProducing is called.
reason.trap(task.TaskStopped)
return defer.Deferred()
d.addCallbacks(lambda ignored: None, maybeStopped)
return d
def _writeloop(self, consumer):
"""
Return an iterator which reads one chunk of bytes from the input file
and writes them to the consumer for each time it is iterated.
"""
while True:
bytes = self._inputFile.read(self._readSize)
if not bytes:
self._inputFile.close()
break
consumer.write(bytes)
yield None
def pauseProducing(self):
"""
Temporarily suspend copying bytes from the input file to the consumer
by pausing the L{CooperativeTask} which drives that activity.
"""
self._task.pause()
def resumeProducing(self):
"""
Undo the effects of a previous C{pauseProducing} and resume copying
bytes to the consumer by resuming the L{CooperativeTask} which drives
the write activity.
"""
self._task.resume()
class _HTTP11ClientFactory(protocol.Factory):
"""
A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
@ivar _quiescentCallback: The quiescent callback to be passed to protocol
instances, used to return them to the connection pool.
@since: 11.1
"""
def __init__(self, quiescentCallback):
self._quiescentCallback = quiescentCallback
def buildProtocol(self, addr):
return HTTP11ClientProtocol(self._quiescentCallback)
class _RetryingHTTP11ClientProtocol(object):
"""
A wrapper for L{HTTP11ClientProtocol} that automatically retries requests.
@ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}.
@ivar _newConnection: A callable that creates a new connection for a
retry.
"""
def __init__(self, clientProtocol, newConnection):
self._clientProtocol = clientProtocol
self._newConnection = newConnection
def _shouldRetry(self, method, exception, bodyProducer):
"""
Indicate whether request should be retried.
Only returns C{True} if method is idempotent, no response was
received, the reason for the failed request was not due to
user-requested cancellation, and no body was sent. The latter
requirement may be relaxed in the future, and PUT added to approved
Loading ...