# module pyparsing.py
#
# Copyright (c) 2003-2018 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
__doc__ = \
"""
pyparsing module - Classes and methods to define and execute parsing grammars
=============================================================================
The pyparsing module is an alternative approach to creating and executing simple grammars,
vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
provides a library of classes that you use to construct the grammar directly in Python.
Here is a program to parse "Hello, World!" (or any greeting of the form
C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
L{Literal} expressions)::
from pyparsing import Word, alphas
# define grammar of a greeting
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString(hello))
The program outputs the following::
Hello, World! -> ['Hello', ',', 'World', '!']
The Python representation of the grammar is quite readable, owing to the self-explanatory
class names, and the use of '+', '|' and '^' operators.
The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
object with named attributes.
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
- quoted strings
- embedded comments
Getting Started -
-----------------
Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
classes inherit from. Use the docstrings for examples of how to:
- construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
- construct character word-group expressions using the L{Word} class
- see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
- use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
- associate names with your parsed results using L{ParserElement.setResultsName}
- find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
- find more useful common expressions in the L{pyparsing_common} namespace class
"""
__version__ = "2.2.1"
__versionTime__ = "18 Sep 2018 00:49 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
from weakref import ref as wkref
import copy
import sys
import warnings
import re
import sre_constants
import collections
import pprint
import traceback
import types
from datetime import datetime
try:
from _thread import RLock
except ImportError:
from threading import RLock
try:
# Python 3
from collections.abc import Iterable
from collections.abc import MutableMapping
except ImportError:
# Python 2.7
from collections import Iterable
from collections import MutableMapping
try:
from collections import OrderedDict as _OrderedDict
except ImportError:
try:
from ordereddict import OrderedDict as _OrderedDict
except ImportError:
_OrderedDict = None
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [
'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
'CloseMatch', 'tokenMap', 'pyparsing_common',
]
system_version = tuple(sys.version_info)[:3]
PY_3 = system_version[0] == 3
if PY_3:
_MAX_INT = sys.maxsize
basestring = str
unichr = chr
_ustr = str
# build list of single arg builtins, that can be used as parse actions
singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
else:
_MAX_INT = sys.maxint
range = xrange
def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >.
"""
if isinstance(obj,unicode):
return obj
try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str(obj)
except UnicodeEncodeError:
# Else encode it
ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
xmlcharref = Regex(r'&#\d+;')
xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
return xmlcharref.transformString(ret)
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
import __builtin__
for fname in "sum len sorted reversed list tuple set any all min max".split():
try:
singleArgBuiltins.append(getattr(__builtin__,fname))
except AttributeError:
continue
_generatorType = type((y for y in range(1)))
def _xml_escape(data):
"""Escape &, <, >, ", ', etc. in a string of data."""
# ampersand must be replaced first
from_symbols = '&><"\''
to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
for from_,to_ in zip(from_symbols, to_symbols):
data = data.replace(from_, to_)
return data
class _Constants(object):
pass
alphas = string.ascii_uppercase + string.ascii_lowercase
nums = "0123456789"
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
_bslash = chr(92)
printables = "".join(c for c in string.printable if c not in string.whitespace)
class ParseBaseException(Exception):
"""base exception class for all parsing runtime exceptions"""
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__( self, pstr, loc=0, msg=None, elem=None ):
self.loc = loc
if msg is None:
self.msg = pstr
self.pstr = ""
else:
self.msg = msg
self.pstr = pstr
self.parserElement = elem
self.args = (pstr, loc, msg)
@classmethod
def _from_exception(cls, pe):
"""
internal factory method to simplify creating one type of ParseException
from another - avoids having __init__ signature conflicts among subclasses
"""
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
def __getattr__( self, aname ):
"""supported attributes by name are:
- lineno - returns the line number of the exception text
- col - returns the column number of the exception text
- line - returns the line containing the exception text
"""
if( aname == "lineno" ):
return lineno( self.loc, self.pstr )
elif( aname in ("col", "column") ):
return col( self.loc, self.pstr )
elif( aname == "line" ):
return line( self.loc, self.pstr )
else:
raise AttributeError(aname)
def __str__( self ):
return "%s (at char %d), (line:%d, col:%d)" % \
( self.msg, self.loc, self.lineno, self.column )
def __repr__( self ):
return _ustr(self)
def markInputline( self, markerString = ">!<" ):
"""Extracts the exception line from the input string, and marks
the location of the exception with a special symbol.
"""
line_str = self.line
line_column = self.column - 1
if markerString:
line_str = "".join((line_str[:line_column],
markerString, line_str[line_column:]))
return line_str.strip()
def __dir__(self):
return "lineno col line".split() + dir(type(self))
class ParseException(ParseBaseException):
"""
Exception thrown when parse expressions don't match class;
supported attributes by name are:
- lineno - returns the line number of the exception text
- col - returns the column number of the exception text
- line - returns the line containing the exception text
Example::
try:
Word(nums).setName("integer").parseString("ABC")
except ParseException as pe:
print(pe)
print("column: {}".format(pe.col))
prints::
Expected integer (at char 0), (line:1, col:1)
column: 1
"""
pass
class ParseFatalException(ParseBaseException):
"""user-throwable exception thrown when inconsistent parse content
is found; stops all parsing immediately"""
pass
class ParseSyntaxException(ParseFatalException):
"""just like L{ParseFatalException}, but thrown internally when an
L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
immediately because an unbacktrackable syntax error has been found"""
pass
#~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause
#~ pyparsing to reparse the input string:
#~ - with a modified input string, and/or
#~ - with a modified start location
#~ Set the values of the ReparseException in the constructor, and raise the
#~ exception in a parse action to cause pyparsing to use the new string/location.
#~ Setting the values as None causes no change to be made.
#~ """
#~ def __init_( self, newstring, restartLoc ):
#~ self.newParseText = newstring
#~ self.reparseLoc = restartLoc
class RecursiveGrammarException(Exception):
"""exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
def __init__( self, parseElementList ):
self.parseElementTrace = parseElementList
def __str__( self ):
return "RecursiveGrammarException: %s" % self.parseElementTrace
class _ParseResultsWithOffset(object):
def __init__(self,p1,p2):
self.tup = (p1,p2)
def __getitem__(self,i):
return self.tup[i]
def __repr__(self):
return repr(self.tup[0])
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object):
"""
Structured parse results, to provide multiple means of access to the parsed data:
- as a list (C{len(results)})
- by list index (C{results[0], results[1]}, etc.)
- by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
Example::
integer = Word(nums)
date_str = (integer.setResultsName("year") + '/'
+ integer.setResultsName("month") + '/'
+ integer.setResultsName("day"))
# equivalent form:
# date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
# parseString returns a ParseResults object
result = date_str.parseString("1999/12/31")
def test(s, fn=repr):
print("%s -> %s" % (s, fn(eval(s))))
test("list(result)")
test("result[0]")
Loading ...