# -*- test-case-name: twisted.web.test.test_newclient -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.
"""
An U{HTTP 1.1<http://www.w3.org/Protocols/rfc2616/rfc2616.html>} client.
The way to use the functionality provided by this module is to:
- Connect a L{HTTP11ClientProtocol} to an HTTP server
- Create a L{Request} with the appropriate data
- Pass the request to L{HTTP11ClientProtocol.request}
- The returned Deferred will fire with a L{Response} object
- Create a L{IProtocol} provider which can handle the response body
- Connect it to the response with L{Response.deliverBody}
- When the protocol's C{connectionLost} method is called, the response is
complete. See L{Response.deliverBody} for details.
Various other classes in this module support this usage:
- HTTPParser is the basic HTTP parser. It can handle the parts of HTTP which
are symmetric between requests and responses.
- HTTPClientParser extends HTTPParser to handle response-specific parts of
HTTP. One instance is created for each request to parse the corresponding
response.
"""
__metaclass__ = type
from zope.interface import implements
from twisted.python import log
from twisted.python.reflect import fullyQualifiedName
from twisted.python.failure import Failure
from twisted.internet.interfaces import IConsumer, IPushProducer
from twisted.internet.error import ConnectionDone
from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
from twisted.internet.defer import CancelledError
from twisted.internet.protocol import Protocol
from twisted.protocols.basic import LineReceiver
from twisted.web.http_headers import Headers
from twisted.web.http import NO_CONTENT, NOT_MODIFIED
from twisted.web.http import _DataLoss, PotentialDataLoss
from twisted.web.http import _IdentityTransferDecoder, _ChunkedTransferDecoder
from .iweb import IResponse, UNKNOWN_LENGTH
# States HTTPParser can be in
STATUS = 'STATUS'
HEADER = 'HEADER'
BODY = 'BODY'
DONE = 'DONE'
class BadHeaders(Exception):
"""
Headers passed to L{Request} were in some way invalid.
"""
class ExcessWrite(Exception):
"""
The body L{IBodyProducer} for a request tried to write data after
indicating it had finished writing data.
"""
class ParseError(Exception):
"""
Some received data could not be parsed.
@ivar data: The string which could not be parsed.
"""
def __init__(self, reason, data):
Exception.__init__(self, reason, data)
self.data = data
class BadResponseVersion(ParseError):
"""
The version string in a status line was unparsable.
"""
class _WrapperException(Exception):
"""
L{_WrapperException} is the base exception type for exceptions which
include one or more other exceptions as the low-level causes.
@ivar reasons: A list of exceptions. See subclass documentation for more
details.
"""
def __init__(self, reasons):
Exception.__init__(self, reasons)
self.reasons = reasons
class RequestGenerationFailed(_WrapperException):
"""
There was an error while creating the bytes which make up a request.
@ivar reasons: A C{list} of one or more L{Failure} instances giving the
reasons the request generation was considered to have failed.
"""
class RequestTransmissionFailed(_WrapperException):
"""
There was an error while sending the bytes which make up a request.
@ivar reasons: A C{list} of one or more L{Failure} instances giving the
reasons the request transmission was considered to have failed.
"""
class ConnectionAborted(Exception):
"""
The connection was explicitly aborted by application code.
"""
class WrongBodyLength(Exception):
"""
An L{IBodyProducer} declared the number of bytes it was going to
produce (via its C{length} attribute) and then produced a different number
of bytes.
"""
class ResponseDone(Exception):
"""
L{ResponseDone} may be passed to L{IProtocol.connectionLost} on the
protocol passed to L{Response.deliverBody} and indicates that the entire
response has been delivered.
"""
class ResponseFailed(_WrapperException):
"""
L{ResponseFailed} indicates that all of the response to a request was not
received for some reason.
@ivar reasons: A C{list} of one or more L{Failure} instances giving the
reasons the response was considered to have failed.
@ivar response: If specified, the L{Response} received from the server (and
in particular the status code and the headers).
"""
def __init__(self, reasons, response=None):
_WrapperException.__init__(self, reasons)
self.response = response
class ResponseNeverReceived(ResponseFailed):
"""
A L{ResponseFailed} that knows no response bytes at all have been received.
"""
class RequestNotSent(Exception):
"""
L{RequestNotSent} indicates that an attempt was made to issue a request but
for reasons unrelated to the details of the request itself, the request
could not be sent. For example, this may indicate that an attempt was made
to send a request using a protocol which is no longer connected to a
server.
"""
def _callAppFunction(function):
"""
Call C{function}. If it raises an exception, log it with a minimal
description of the source.
@return: C{None}
"""
try:
function()
except:
log.err(None, "Unexpected exception from %s" % (
fullyQualifiedName(function),))
class HTTPParser(LineReceiver):
"""
L{HTTPParser} handles the parsing side of HTTP processing. With a suitable
subclass, it can parse either the client side or the server side of the
connection.
@ivar headers: All of the non-connection control message headers yet
received.
@ivar state: State indicator for the response parsing state machine. One
of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}.
@ivar _partialHeader: C{None} or a C{list} of the lines of a multiline
header while that header is being received.
"""
# NOTE: According to HTTP spec, we're supposed to eat the
# 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that
# doesn't sound like a good idea to me, because it makes it impossible to
# have a non-authenticating transparent proxy in front of an authenticating
# proxy. An authenticating proxy can eat them itself. -jknight
#
# Further, quoting
# http://homepages.tesco.net/J.deBoynePollard/FGA/web-proxy-connection-header.html
# regarding the 'Proxy-Connection' header:
#
# The Proxy-Connection: header is a mistake in how some web browsers
# use HTTP. Its name is the result of a false analogy. It is not a
# standard part of the protocol. There is a different standard
# protocol mechanism for doing what it does. And its existence
# imposes a requirement upon HTTP servers such that no proxy HTTP
# server can be standards-conforming in practice.
#
# -exarkun
# Some servers (like http://news.ycombinator.com/) return status lines and
# HTTP headers delimited by \n instead of \r\n.
delimiter = '\n'
CONNECTION_CONTROL_HEADERS = set([
'content-length', 'connection', 'keep-alive', 'te', 'trailers',
'transfer-encoding', 'upgrade', 'proxy-connection'])
def connectionMade(self):
self.headers = Headers()
self.connHeaders = Headers()
self.state = STATUS
self._partialHeader = None
def switchToBodyMode(self, decoder):
"""
Switch to body parsing mode - interpret any more bytes delivered as
part of the message body and deliver them to the given decoder.
"""
if self.state == BODY:
raise RuntimeError("already in body mode")
self.bodyDecoder = decoder
self.state = BODY
self.setRawMode()
def lineReceived(self, line):
"""
Handle one line from a response.
"""
# Handle the normal CR LF case.
if line[-1:] == '\r':
line = line[:-1]
if self.state == STATUS:
self.statusReceived(line)
self.state = HEADER
elif self.state == HEADER:
if not line or line[0] not in ' \t':
if self._partialHeader is not None:
header = ''.join(self._partialHeader)
name, value = header.split(':', 1)
value = value.strip()
self.headerReceived(name, value)
if not line:
# Empty line means the header section is over.
self.allHeadersReceived()
else:
# Line not beginning with LWS is another header.
self._partialHeader = [line]
else:
# A line beginning with LWS is a continuation of a header
# begun on a previous line.
self._partialHeader.append(line)
def rawDataReceived(self, data):
"""
Pass data from the message body to the body decoder object.
"""
self.bodyDecoder.dataReceived(data)
def isConnectionControlHeader(self, name):
"""
Return C{True} if the given lower-cased name is the name of a
connection control header (rather than an entity header).
According to RFC 2616, section 14.10, the tokens in the Connection
header are probably relevant here. However, I am not sure what the
practical consequences of either implementing or ignoring that are.
So I leave it unimplemented for the time being.
"""
return name in self.CONNECTION_CONTROL_HEADERS
def statusReceived(self, status):
"""
Callback invoked whenever the first line of a new message is received.
Override this.
@param status: The first line of an HTTP request or response message
without trailing I{CR LF}.
@type status: C{str}
"""
def headerReceived(self, name, value):
"""
Store the given header in C{self.headers}.
"""
name = name.lower()
if self.isConnectionControlHeader(name):
headers = self.connHeaders
else:
headers = self.headers
headers.addRawHeader(name, value)
def allHeadersReceived(self):
"""
Callback invoked after the last header is passed to C{headerReceived}.
Override this to change to the C{BODY} or C{DONE} state.
"""
self.switchToBodyMode(None)
class HTTPClientParser(HTTPParser):
"""
Loading ...