"""
Multi-part parsing for file uploads.
Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to
file upload handlers for processing.
"""
from __future__ import unicode_literals
import base64
import binascii
import cgi
import sys
from django.conf import settings
from django.core.exceptions import SuspiciousMultipartForm
from django.core.files.uploadhandler import (
SkipFile, StopFutureHandlers, StopUpload,
)
from django.utils import six
from django.utils.datastructures import MultiValueDict
from django.utils.encoding import force_text
from django.utils.six.moves.urllib.parse import unquote
from django.utils.text import unescape_entities
__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted')
class MultiPartParserError(Exception):
pass
class InputStreamExhausted(Exception):
"""
No more reads are allowed from this device.
"""
pass
RAW = "raw"
FILE = "file"
FIELD = "field"
_BASE64_DECODE_ERROR = TypeError if six.PY2 else binascii.Error
class MultiPartParser(object):
"""
A rfc2388 multipart/form-data parser.
``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``.
"""
def __init__(self, META, input_data, upload_handlers, encoding=None):
"""
Initialize the MultiPartParser object.
:META:
The standard ``META`` dictionary in Django request objects.
:input_data:
The raw post data, as a file-like object.
:upload_handlers:
A list of UploadHandler instances that perform operations on the uploaded
data.
:encoding:
The encoding with which to treat the incoming data.
"""
#
# Content-Type should contain multipart and the boundary information.
#
content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
if not content_type.startswith('multipart/'):
raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
# Parse the header to get the boundary to split the parts.
ctypes, opts = parse_header(content_type.encode('ascii'))
boundary = opts.get('boundary')
if not boundary or not cgi.valid_boundary(boundary):
raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
# Content-Length should contain the length of the body we are about
# to receive.
try:
content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH', 0)))
except (ValueError, TypeError):
content_length = 0
if content_length < 0:
# This means we shouldn't continue...raise an error.
raise MultiPartParserError("Invalid content length: %r" % content_length)
if isinstance(boundary, six.text_type):
boundary = boundary.encode('ascii')
self._boundary = boundary
self._input_data = input_data
# For compatibility with low-level network APIs (with 32-bit integers),
# the chunk size should be < 2^31, but still divisible by 4.
possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]
self._chunk_size = min([2 ** 31 - 4] + possible_sizes)
self._meta = META
self._encoding = encoding or settings.DEFAULT_CHARSET
self._content_length = content_length
self._upload_handlers = upload_handlers
def parse(self):
"""
Parse the POST data and break it into a FILES MultiValueDict and a POST
MultiValueDict.
Returns a tuple containing the POST and FILES dictionary, respectively.
"""
# We have to import QueryDict down here to avoid a circular import.
from django.http import QueryDict
encoding = self._encoding
handlers = self._upload_handlers
# HTTP spec says that Content-Length >= 0 is valid
# handling content-length == 0 before continuing
if self._content_length == 0:
return QueryDict('', encoding=self._encoding), MultiValueDict()
# See if any of the handlers take care of the parsing.
# This allows overriding everything if need be.
for handler in handlers:
result = handler.handle_raw_input(self._input_data,
self._meta,
self._content_length,
self._boundary,
encoding)
# Check to see if it was handled
if result is not None:
return result[0], result[1]
# Create the data structures to be used later.
self._post = QueryDict('', mutable=True)
self._files = MultiValueDict()
# Instantiate the parser and stream:
stream = LazyStream(ChunkIter(self._input_data, self._chunk_size))
# Whether or not to signal a file-completion at the beginning of the loop.
old_field_name = None
counters = [0] * len(handlers)
try:
for item_type, meta_data, field_stream in Parser(stream, self._boundary):
if old_field_name:
# We run this at the beginning of the next loop
# since we cannot be sure a file is complete until
# we hit the next boundary/part of the multipart content.
self.handle_file_complete(old_field_name, counters)
old_field_name = None
try:
disposition = meta_data['content-disposition'][1]
field_name = disposition['name'].strip()
except (KeyError, IndexError, AttributeError):
continue
transfer_encoding = meta_data.get('content-transfer-encoding')
if transfer_encoding is not None:
transfer_encoding = transfer_encoding[0].strip()
field_name = force_text(field_name, encoding, errors='replace')
if item_type == FIELD:
# This is a post field, we can just set it in the post
if transfer_encoding == 'base64':
raw_data = field_stream.read()
try:
data = base64.b64decode(raw_data)
except _BASE64_DECODE_ERROR:
data = raw_data
else:
data = field_stream.read()
self._post.appendlist(field_name,
force_text(data, encoding, errors='replace'))
elif item_type == FILE:
# This is a file, use the handler...
file_name = disposition.get('filename')
if file_name:
file_name = force_text(file_name, encoding, errors='replace')
file_name = self.IE_sanitize(unescape_entities(file_name))
if not file_name:
continue
content_type, content_type_extra = meta_data.get('content-type', ('', {}))
content_type = content_type.strip()
charset = content_type_extra.get('charset')
try:
content_length = int(meta_data.get('content-length')[0])
except (IndexError, TypeError, ValueError):
content_length = None
counters = [0] * len(handlers)
try:
for handler in handlers:
try:
handler.new_file(field_name, file_name,
content_type, content_length,
charset, content_type_extra)
except StopFutureHandlers:
break
for chunk in field_stream:
if transfer_encoding == 'base64':
# We only special-case base64 transfer encoding
# We should always decode base64 chunks by multiple of 4,
# ignoring whitespace.
stripped_chunk = b"".join(chunk.split())
remaining = len(stripped_chunk) % 4
while remaining != 0:
over_chunk = field_stream.read(4 - remaining)
stripped_chunk += b"".join(over_chunk.split())
remaining = len(stripped_chunk) % 4
try:
chunk = base64.b64decode(stripped_chunk)
except Exception as e:
# Since this is only a chunk, any error is an unfixable error.
msg = "Could not decode base64 data: %r" % e
six.reraise(MultiPartParserError, MultiPartParserError(msg), sys.exc_info()[2])
for i, handler in enumerate(handlers):
chunk_length = len(chunk)
chunk = handler.receive_data_chunk(chunk,
counters[i])
counters[i] += chunk_length
if chunk is None:
# If the chunk received by the handler is None, then don't continue.
break
except SkipFile:
self._close_files()
# Just use up the rest of this file...
exhaust(field_stream)
else:
# Handle file upload completions on next iteration.
old_field_name = field_name
else:
# If this is neither a FIELD or a FILE, just exhaust the stream.
exhaust(stream)
except StopUpload as e:
self._close_files()
if not e.connection_reset:
exhaust(self._input_data)
else:
# Make sure that the request data is all fed
exhaust(self._input_data)
# Signal that the upload has completed.
for handler in handlers:
retval = handler.upload_complete()
if retval:
break
return self._post, self._files
def handle_file_complete(self, old_field_name, counters):
"""
Handle all the signaling that takes place when a file is complete.
"""
for i, handler in enumerate(self._upload_handlers):
file_obj = handler.file_complete(counters[i])
if file_obj:
# If it returns a file object, then set the files dict.
self._files.appendlist(
force_text(old_field_name, self._encoding, errors='replace'),
file_obj)
break
def IE_sanitize(self, filename):
"""Cleanup filename from Internet Explorer full paths."""
return filename and filename[filename.rfind("\\") + 1:].strip()
def _close_files(self):
# Free up all file handles.
# FIXME: this currently assumes that upload handlers store the file as 'file'
# We should document that... (Maybe add handler.free_file to complement new_file)
for handler in self._upload_handlers:
if hasattr(handler, 'file'):
handler.file.close()
class LazyStream(six.Iterator):
"""
The LazyStream wrapper allows one to get and "unget" bytes from a stream.
Given a producer object (an iterator that yields bytestrings), the
LazyStream object will support iteration, reading, and keeping a "look-back"
variable in case you need to "unget" some bytes.
"""
def __init__(self, producer, length=None):
"""
Every LazyStream must have a producer when instantiated.
A producer is an iterable that returns a string each time it
is called.
"""
self._producer = producer
self._empty = False
self._leftover = b''
self.length = length
self.position = 0
self._remaining = length
self._unget_history = []
def tell(self):
return self.position
def read(self, size=None):
def parts():
remaining = self._remaining if size is None else size
# do the whole thing in one shot if no limit was provided.
if remaining is None:
yield b''.join(self)
return
# otherwise do some bookkeeping to return exactly enough
# of the stream and stashing any extra content we get from
# the producer
while remaining != 0:
assert remaining > 0, 'remaining bytes to read should never go negative'
try:
chunk = next(self)
except StopIteration:
return
else:
emitting = chunk[:remaining]
self.unget(chunk[remaining:])
remaining -= len(emitting)
yield emitting
out = b''.join(parts())
return out
def __next__(self):
"""
Loading ...