Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aroundthecode / ruamel.yaml   python

Repository URL to install this package:

/ yaml / parser.py

# coding: utf-8

from __future__ import absolute_import

# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream            ::= STREAM-START implicit_document? explicit_document*
#                                                                   STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
#                       ALIAS
#                       | properties (block_content |
#                                                   indentless_block_sequence)?
#                       | block_content
#                       | indentless_block_sequence
# block_node        ::= ALIAS
#                       | properties block_content?
#                       | block_content
# flow_node         ::= ALIAS
#                       | properties flow_content?
#                       | flow_content
# properties        ::= TAG ANCHOR? | ANCHOR TAG?
# block_content     ::= block_collection | flow_collection | SCALAR
# flow_content      ::= flow_collection | SCALAR
# block_collection  ::= block_sequence | block_mapping
# flow_collection   ::= flow_sequence | flow_mapping
# block_sequence    ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)*
#                                                                   BLOCK-END
# indentless_sequence   ::= (BLOCK-ENTRY block_node?)+
# block_mapping     ::= BLOCK-MAPPING_START
#                       ((KEY block_node_or_indentless_sequence?)?
#                       (VALUE block_node_or_indentless_sequence?)?)*
#                       BLOCK-END
# flow_sequence     ::= FLOW-SEQUENCE-START
#                       (flow_sequence_entry FLOW-ENTRY)*
#                       flow_sequence_entry?
#                       FLOW-SEQUENCE-END
# flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping      ::= FLOW-MAPPING-START
#                       (flow_mapping_entry FLOW-ENTRY)*
#                       flow_mapping_entry?
#                       FLOW-MAPPING-END
# flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START
#                  BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START
#                               FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR
#               BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START
#               FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
#                                                    FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
#                                                    FLOW-MAPPING-START KEY }

# need to have full path, as pkg_resources tries to load parser.py in __init__.py
# only to not do anything with the package afterwards
# and for Jython too


from ruamel.yaml.error import MarkedYAMLError
from ruamel.yaml.tokens import *  # NOQA
from ruamel.yaml.events import *  # NOQA
from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError  # NOQA
from ruamel.yaml.compat import utf8, nprint, nprintf  # NOQA

if False:  # MYPY
    from typing import Any, Dict, Optional, List  # NOQA

__all__ = ['Parser', 'RoundTripParser', 'ParserError']


class ParserError(MarkedYAMLError):
    pass


class Parser(object):
    # Since writing a recursive-descendant parser is a straightforward task, we
    # do not give many comments here.

    DEFAULT_TAGS = {u'!': u'!', u'!!': u'tag:yaml.org,2002:'}

    def __init__(self, loader):
        # type: (Any) -> None
        self.loader = loader
        if self.loader is not None and getattr(self.loader, '_parser', None) is None:
            self.loader._parser = self
        self.reset_parser()

    def reset_parser(self):
        # type: () -> None
        # Reset the state attributes (to clear self-references)
        self.current_event = None
        self.yaml_version = None
        self.tag_handles = {}  # type: Dict[Any, Any]
        self.states = []  # type: List[Any]
        self.marks = []  # type: List[Any]
        self.state = self.parse_stream_start  # type: Any

    def dispose(self):
        # type: () -> None
        self.reset_parser()

    @property
    def scanner(self):
        # type: () -> Any
        if hasattr(self.loader, 'typ'):
            return self.loader.scanner
        return self.loader._scanner

    @property
    def resolver(self):
        # type: () -> Any
        if hasattr(self.loader, 'typ'):
            return self.loader.resolver
        return self.loader._resolver

    def check_event(self, *choices):
        # type: (Any) -> bool
        # Check the type of the next event.
        if self.current_event is None:
            if self.state:
                self.current_event = self.state()
        if self.current_event is not None:
            if not choices:
                return True
            for choice in choices:
                if isinstance(self.current_event, choice):
                    return True
        return False

    def peek_event(self):
        # type: () -> Any
        # Get the next event.
        if self.current_event is None:
            if self.state:
                self.current_event = self.state()
        return self.current_event

    def get_event(self):
        # type: () -> Any
        # Get the next event and proceed further.
        if self.current_event is None:
            if self.state:
                self.current_event = self.state()
        value = self.current_event
        self.current_event = None
        return value

    # stream    ::= STREAM-START implicit_document? explicit_document*
    #                                                               STREAM-END
    # implicit_document ::= block_node DOCUMENT-END*
    # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*

    def parse_stream_start(self):
        # type: () -> Any
        # Parse the stream start.
        token = self.scanner.get_token()
        token.move_comment(self.scanner.peek_token())
        event = StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding)

        # Prepare the next state.
        self.state = self.parse_implicit_document_start

        return event

    def parse_implicit_document_start(self):
        # type: () -> Any
        # Parse an implicit document.
        if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken):
            self.tag_handles = self.DEFAULT_TAGS
            token = self.scanner.peek_token()
            start_mark = end_mark = token.start_mark
            event = DocumentStartEvent(start_mark, end_mark, explicit=False)

            # Prepare the next state.
            self.states.append(self.parse_document_end)
            self.state = self.parse_block_node

            return event

        else:
            return self.parse_document_start()

    def parse_document_start(self):
        # type: () -> Any
        # Parse any extra document end indicators.
        while self.scanner.check_token(DocumentEndToken):
            self.scanner.get_token()
        # Parse an explicit document.
        if not self.scanner.check_token(StreamEndToken):
            token = self.scanner.peek_token()
            start_mark = token.start_mark
            version, tags = self.process_directives()
            if not self.scanner.check_token(DocumentStartToken):
                raise ParserError(
                    None,
                    None,
                    "expected '<document start>', but found %r" % self.scanner.peek_token().id,
                    self.scanner.peek_token().start_mark,
                )
            token = self.scanner.get_token()
            end_mark = token.end_mark
            event = DocumentStartEvent(
                start_mark, end_mark, explicit=True, version=version, tags=tags
            )  # type: Any
            self.states.append(self.parse_document_end)
            self.state = self.parse_document_content
        else:
            # Parse the end of the stream.
            token = self.scanner.get_token()
            event = StreamEndEvent(token.start_mark, token.end_mark, comment=token.comment)
            assert not self.states
            assert not self.marks
            self.state = None
        return event

    def parse_document_end(self):
        # type: () -> Any
        # Parse the document end.
        token = self.scanner.peek_token()
        start_mark = end_mark = token.start_mark
        explicit = False
        if self.scanner.check_token(DocumentEndToken):
            token = self.scanner.get_token()
            end_mark = token.end_mark
            explicit = True
        event = DocumentEndEvent(start_mark, end_mark, explicit=explicit)

        # Prepare the next state.
        if self.resolver.processing_version == (1, 1):
            self.state = self.parse_document_start
        else:
            self.state = self.parse_implicit_document_start

        return event

    def parse_document_content(self):
        # type: () -> Any
        if self.scanner.check_token(
            DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken
        ):
            event = self.process_empty_scalar(self.scanner.peek_token().start_mark)
            self.state = self.states.pop()
            return event
        else:
            return self.parse_block_node()

    def process_directives(self):
        # type: () -> Any
        self.yaml_version = None
        self.tag_handles = {}
        while self.scanner.check_token(DirectiveToken):
            token = self.scanner.get_token()
            if token.name == u'YAML':
                if self.yaml_version is not None:
                    raise ParserError(
                        None, None, 'found duplicate YAML directive', token.start_mark
                    )
                major, minor = token.value
                if major != 1:
                    raise ParserError(
                        None,
                        None,
                        'found incompatible YAML document (version 1.* is ' 'required)',
                        token.start_mark,
                    )
                self.yaml_version = token.value
            elif token.name == u'TAG':
                handle, prefix = token.value
                if handle in self.tag_handles:
                    raise ParserError(
                        None, None, 'duplicate tag handle %r' % utf8(handle), token.start_mark
                    )
                self.tag_handles[handle] = prefix
        if bool(self.tag_handles):
            value = self.yaml_version, self.tag_handles.copy()  # type: Any
        else:
            value = self.yaml_version, None
        for key in self.DEFAULT_TAGS:
            if key not in self.tag_handles:
                self.tag_handles[key] = self.DEFAULT_TAGS[key]
        return value

    # block_node_or_indentless_sequence ::= ALIAS
    #               | properties (block_content | indentless_block_sequence)?
    #               | block_content
    #               | indentless_block_sequence
    # block_node    ::= ALIAS
    #                   | properties block_content?
    #                   | block_content
    # flow_node     ::= ALIAS
    #                   | properties flow_content?
    #                   | flow_content
    # properties    ::= TAG ANCHOR? | ANCHOR TAG?
    # block_content     ::= block_collection | flow_collection | SCALAR
    # flow_content      ::= flow_collection | SCALAR
    # block_collection  ::= block_sequence | block_mapping
    # flow_collection   ::= flow_sequence | flow_mapping

    def parse_block_node(self):
        # type: () -> Any
        return self.parse_node(block=True)

    def parse_flow_node(self):
        # type: () -> Any
        return self.parse_node()

    def parse_block_node_or_indentless_sequence(self):
        # type: () -> Any
        return self.parse_node(block=True, indentless_sequence=True)

    def transform_tag(self, handle, suffix):
        # type: (Any, Any) -> Any
        return self.tag_handles[handle] + suffix

    def parse_node(self, block=False, indentless_sequence=False):
        # type: (bool, bool) -> Any
        if self.scanner.check_token(AliasToken):
            token = self.scanner.get_token()
            event = AliasEvent(token.value, token.start_mark, token.end_mark)  # type: Any
            self.state = self.states.pop()
            return event

        anchor = None
        tag = None
        start_mark = end_mark = tag_mark = None
Loading ...