Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

hemamaps / Django   python

Repository URL to install this package:

Version: 1.9.8 

/ core / serializers / xml_serializer.py

"""
XML serializer.
"""

from __future__ import unicode_literals

from collections import OrderedDict
from xml.dom import pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser

from django.apps import apps
from django.conf import settings
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.encoding import smart_text
from django.utils.xmlutils import (
    SimplerXMLGenerator, UnserializableContentError,
)


class Serializer(base.Serializer):
    """
    Serializes a QuerySet to XML.
    """

    def indent(self, level):
        if self.options.get('indent') is not None:
            self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent') * level)

    def start_serialization(self):
        """
        Start serialization -- open the XML document and the root element.
        """
        self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET))
        self.xml.startDocument()
        self.xml.startElement("django-objects", {"version": "1.0"})

    def end_serialization(self):
        """
        End serialization -- end the document.
        """
        self.indent(0)
        self.xml.endElement("django-objects")
        self.xml.endDocument()

    def start_object(self, obj):
        """
        Called as each object is handled.
        """
        if not hasattr(obj, "_meta"):
            raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))

        self.indent(1)
        model = obj._meta.proxy_for_model if obj._deferred else obj.__class__
        attrs = OrderedDict([("model", smart_text(model._meta))])
        if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
            obj_pk = obj._get_pk_val()
            if obj_pk is not None:
                attrs['pk'] = smart_text(obj_pk)

        self.xml.startElement("object", attrs)

    def end_object(self, obj):
        """
        Called after handling all fields for an object.
        """
        self.indent(1)
        self.xml.endElement("object")

    def handle_field(self, obj, field):
        """
        Called to handle each field on an object (except for ForeignKeys and
        ManyToManyFields)
        """
        self.indent(2)
        self.xml.startElement("field", OrderedDict([
            ("name", field.name),
            ("type", field.get_internal_type()),
        ]))

        # Get a "string version" of the object's data.
        if getattr(obj, field.name) is not None:
            try:
                self.xml.characters(field.value_to_string(obj))
            except UnserializableContentError:
                raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
                    obj.__class__.__name__, field.name, obj._get_pk_val()))
        else:
            self.xml.addQuickElement("None")

        self.xml.endElement("field")

    def handle_fk_field(self, obj, field):
        """
        Called to handle a ForeignKey (we need to treat them slightly
        differently from regular fields).
        """
        self._start_relational_field(field)
        related_att = getattr(obj, field.get_attname())
        if related_att is not None:
            if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
                related = getattr(obj, field.name)
                # If related object has a natural key, use it
                related = related.natural_key()
                # Iterable natural keys are rolled out as subelements
                for key_value in related:
                    self.xml.startElement("natural", {})
                    self.xml.characters(smart_text(key_value))
                    self.xml.endElement("natural")
            else:
                self.xml.characters(smart_text(related_att))
        else:
            self.xml.addQuickElement("None")
        self.xml.endElement("field")

    def handle_m2m_field(self, obj, field):
        """
        Called to handle a ManyToManyField. Related objects are only
        serialized as references to the object's PK (i.e. the related *data*
        is not dumped, just the relation).
        """
        if field.remote_field.through._meta.auto_created:
            self._start_relational_field(field)
            if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
                # If the objects in the m2m have a natural key, use it
                def handle_m2m(value):
                    natural = value.natural_key()
                    # Iterable natural keys are rolled out as subelements
                    self.xml.startElement("object", {})
                    for key_value in natural:
                        self.xml.startElement("natural", {})
                        self.xml.characters(smart_text(key_value))
                        self.xml.endElement("natural")
                    self.xml.endElement("object")
            else:
                def handle_m2m(value):
                    self.xml.addQuickElement("object", attrs={
                        'pk': smart_text(value._get_pk_val())
                    })
            for relobj in getattr(obj, field.name).iterator():
                handle_m2m(relobj)

            self.xml.endElement("field")

    def _start_relational_field(self, field):
        """
        Helper to output the <field> element for relational fields
        """
        self.indent(2)
        self.xml.startElement("field", OrderedDict([
            ("name", field.name),
            ("rel", field.remote_field.__class__.__name__),
            ("to", smart_text(field.remote_field.model._meta)),
        ]))


class Deserializer(base.Deserializer):
    """
    Deserialize XML.
    """

    def __init__(self, stream_or_string, **options):
        super(Deserializer, self).__init__(stream_or_string, **options)
        self.event_stream = pulldom.parse(self.stream, self._make_parser())
        self.db = options.pop('using', DEFAULT_DB_ALIAS)
        self.ignore = options.pop('ignorenonexistent', False)

    def _make_parser(self):
        """Create a hardened XML parser (no custom/external entities)."""
        return DefusedExpatParser()

    def __next__(self):
        for event, node in self.event_stream:
            if event == "START_ELEMENT" and node.nodeName == "object":
                self.event_stream.expandNode(node)
                return self._handle_object(node)
        raise StopIteration

    def _handle_object(self, node):
        """
        Convert an <object> node to a DeserializedObject.
        """
        # Look up the model using the model loading mechanism. If this fails,
        # bail.
        Model = self._get_model_from_node(node, "model")

        # Start building a data dictionary from the object.
        data = {}
        if node.hasAttribute('pk'):
            data[Model._meta.pk.attname] = Model._meta.pk.to_python(
                node.getAttribute('pk'))

        # Also start building a dict of m2m data (this is saved as
        # {m2m_accessor_attribute : [list_of_related_objects]})
        m2m_data = {}

        field_names = {f.name for f in Model._meta.get_fields()}
        # Deserialize each field.
        for field_node in node.getElementsByTagName("field"):
            # If the field is missing the name attribute, bail (are you
            # sensing a pattern here?)
            field_name = field_node.getAttribute("name")
            if not field_name:
                raise base.DeserializationError("<field> node is missing the 'name' attribute")

            # Get the field from the Model. This will raise a
            # FieldDoesNotExist if, well, the field doesn't exist, which will
            # be propagated correctly unless ignorenonexistent=True is used.
            if self.ignore and field_name not in field_names:
                continue
            field = Model._meta.get_field(field_name)

            # As is usually the case, relation fields get the special treatment.
            if field.remote_field and isinstance(field.remote_field, models.ManyToManyRel):
                m2m_data[field.name] = self._handle_m2m_field_node(field_node, field)
            elif field.remote_field and isinstance(field.remote_field, models.ManyToOneRel):
                data[field.attname] = self._handle_fk_field_node(field_node, field)
            else:
                if field_node.getElementsByTagName('None'):
                    value = None
                else:
                    value = field.to_python(getInnerText(field_node).strip())
                data[field.name] = value

        obj = base.build_instance(Model, data, self.db)

        # Return a DeserializedObject so that the m2m data has a place to live.
        return base.DeserializedObject(obj, m2m_data)

    def _handle_fk_field_node(self, node, field):
        """
        Handle a <field> node for a ForeignKey
        """
        # Check if there is a child node named 'None', returning None if so.
        if node.getElementsByTagName('None'):
            return None
        else:
            model = field.remote_field.model
            if hasattr(model._default_manager, 'get_by_natural_key'):
                keys = node.getElementsByTagName('natural')
                if keys:
                    # If there are 'natural' subelements, it must be a natural key
                    field_value = [getInnerText(k).strip() for k in keys]
                    obj = model._default_manager.db_manager(self.db).get_by_natural_key(*field_value)
                    obj_pk = getattr(obj, field.remote_field.field_name)
                    # If this is a natural foreign key to an object that
                    # has a FK/O2O as the foreign key, use the FK value
                    if field.remote_field.model._meta.pk.remote_field:
                        obj_pk = obj_pk.pk
                else:
                    # Otherwise, treat like a normal PK
                    field_value = getInnerText(node).strip()
                    obj_pk = model._meta.get_field(field.remote_field.field_name).to_python(field_value)
                return obj_pk
            else:
                field_value = getInnerText(node).strip()
                return model._meta.get_field(field.remote_field.field_name).to_python(field_value)

    def _handle_m2m_field_node(self, node, field):
        """
        Handle a <field> node for a ManyToManyField.
        """
        model = field.remote_field.model
        default_manager = model._default_manager
        if hasattr(default_manager, 'get_by_natural_key'):
            def m2m_convert(n):
                keys = n.getElementsByTagName('natural')
                if keys:
                    # If there are 'natural' subelements, it must be a natural key
                    field_value = [getInnerText(k).strip() for k in keys]
                    obj_pk = default_manager.db_manager(self.db).get_by_natural_key(*field_value).pk
                else:
                    # Otherwise, treat like a normal PK value.
                    obj_pk = model._meta.pk.to_python(n.getAttribute('pk'))
                return obj_pk
        else:
            m2m_convert = lambda n: model._meta.pk.to_python(n.getAttribute('pk'))
        return [m2m_convert(c) for c in node.getElementsByTagName("object")]

    def _get_model_from_node(self, node, attr):
        """
        Helper to look up a model from a <object model=...> or a <field
        rel=... to=...> node.
        """
        model_identifier = node.getAttribute(attr)
        if not model_identifier:
            raise base.DeserializationError(
                "<%s> node is missing the required '%s' attribute"
                % (node.nodeName, attr))
        try:
            return apps.get_model(model_identifier)
        except (LookupError, TypeError):
            raise base.DeserializationError(
                "<%s> node has invalid model identifier: '%s'"
                % (node.nodeName, model_identifier))


def getInnerText(node):
    """
    Get all the inner text of a DOM node (recursively).
    """
    # inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html
    inner_text = []
    for child in node.childNodes:
        if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
            inner_text.append(child.data)
        elif child.nodeType == child.ELEMENT_NODE:
            inner_text.extend(getInnerText(child))
        else:
            pass
    return "".join(inner_text)


# Below code based on Christian Heimes' defusedxml


class DefusedExpatParser(_ExpatParser):
    """
    An expat parser hardened against XML bomb attacks.

    Forbids DTDs, external entity references
    """
    def __init__(self, *args, **kwargs):
        _ExpatParser.__init__(self, *args, **kwargs)
        self.setFeature(handler.feature_external_ges, False)
        self.setFeature(handler.feature_external_pes, False)

    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
        raise DTDForbidden(name, sysid, pubid)

    def entity_decl(self, name, is_parameter_entity, value, base,
                    sysid, pubid, notation_name):
        raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)

    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
        # expat 1.2
        raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)

    def external_entity_ref_handler(self, context, base, sysid, pubid):
        raise ExternalReferenceForbidden(context, base, sysid, pubid)

    def reset(self):
        _ExpatParser.reset(self)
        parser = self._parser
Loading ...