""":func:`~pandas.eval` parsers
"""
import ast
from functools import partial
import tokenize
import numpy as np
from pandas.compat import StringIO, lmap, reduce, string_types, zip
import pandas as pd
from pandas import compat
from pandas.core import common as com
from pandas.core.base import StringMixin
from pandas.core.computation.ops import (
_LOCAL_TAG, BinOp, Constant, Div, FuncNode, Op, Term, UnaryOp,
UndefinedVariableError, _arith_ops_syms, _bool_ops_syms, _cmp_ops_syms,
_mathops, _reductions, _unary_ops_syms, is_term)
from pandas.core.computation.scope import Scope
import pandas.io.formats.printing as printing
def tokenize_string(source):
"""Tokenize a Python source code string.
Parameters
----------
source : str
A Python source code string
"""
line_reader = StringIO(source).readline
for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader):
yield toknum, tokval
def _rewrite_assign(tok):
"""Rewrite the assignment operator for PyTables expressions that use ``=``
as a substitute for ``==``.
Parameters
----------
tok : tuple of int, str
ints correspond to the all caps constants in the tokenize module
Returns
-------
t : tuple of int, str
Either the input or token or the replacement values
"""
toknum, tokval = tok
return toknum, '==' if tokval == '=' else tokval
def _replace_booleans(tok):
"""Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
precedence is changed to boolean precedence.
Parameters
----------
tok : tuple of int, str
ints correspond to the all caps constants in the tokenize module
Returns
-------
t : tuple of int, str
Either the input or token or the replacement values
"""
toknum, tokval = tok
if toknum == tokenize.OP:
if tokval == '&':
return tokenize.NAME, 'and'
elif tokval == '|':
return tokenize.NAME, 'or'
return toknum, tokval
return toknum, tokval
def _replace_locals(tok):
"""Replace local variables with a syntactically valid name.
Parameters
----------
tok : tuple of int, str
ints correspond to the all caps constants in the tokenize module
Returns
-------
t : tuple of int, str
Either the input or token or the replacement values
Notes
-----
This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
"""
toknum, tokval = tok
if toknum == tokenize.OP and tokval == '@':
return tokenize.OP, _LOCAL_TAG
return toknum, tokval
def _compose2(f, g):
"""Compose 2 callables"""
return lambda *args, **kwargs: f(g(*args, **kwargs))
def _compose(*funcs):
"""Compose 2 or more callables"""
assert len(funcs) > 1, 'At least 2 callables must be passed to compose'
return reduce(_compose2, funcs)
def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
_rewrite_assign)):
"""Compose a collection of tokenization functions
Parameters
----------
source : str
A Python source code string
f : callable
This takes a tuple of (toknum, tokval) as its argument and returns a
tuple with the same structure but possibly different elements. Defaults
to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
``_replace_locals``.
Returns
-------
s : str
Valid Python source code
Notes
-----
The `f` parameter can be any callable that takes *and* returns input of the
form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
the ``tokenize`` module and ``tokval`` is a string.
"""
assert callable(f), 'f must be callable'
return tokenize.untokenize(lmap(f, tokenize_string(source)))
def _is_type(t):
"""Factory for a type checking function of type ``t`` or tuple of types."""
return lambda x: isinstance(x.value, t)
_is_list = _is_type(list)
_is_str = _is_type(string_types)
# partition all AST nodes
_all_nodes = frozenset(filter(lambda x: isinstance(x, type) and
issubclass(x, ast.AST),
(getattr(ast, node) for node in dir(ast))))
def _filter_nodes(superclass, all_nodes=_all_nodes):
"""Filter out AST nodes that are subclasses of ``superclass``."""
node_names = (node.__name__ for node in all_nodes
if issubclass(node, superclass))
return frozenset(node_names)
_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
_mod_nodes = _filter_nodes(ast.mod)
_stmt_nodes = _filter_nodes(ast.stmt)
_expr_nodes = _filter_nodes(ast.expr)
_expr_context_nodes = _filter_nodes(ast.expr_context)
_slice_nodes = _filter_nodes(ast.slice)
_boolop_nodes = _filter_nodes(ast.boolop)
_operator_nodes = _filter_nodes(ast.operator)
_unary_op_nodes = _filter_nodes(ast.unaryop)
_cmp_op_nodes = _filter_nodes(ast.cmpop)
_comprehension_nodes = _filter_nodes(ast.comprehension)
_handler_nodes = _filter_nodes(ast.excepthandler)
_arguments_nodes = _filter_nodes(ast.arguments)
_keyword_nodes = _filter_nodes(ast.keyword)
_alias_nodes = _filter_nodes(ast.alias)
# nodes that we don't support directly but are needed for parsing
_hacked_nodes = frozenset(['Assign', 'Module', 'Expr'])
_unsupported_expr_nodes = frozenset(['Yield', 'GeneratorExp', 'IfExp',
'DictComp', 'SetComp', 'Repr', 'Lambda',
'Set', 'AST', 'Is', 'IsNot'])
# these nodes are low priority or won't ever be supported (e.g., AST)
_unsupported_nodes = ((_stmt_nodes | _mod_nodes | _handler_nodes |
_arguments_nodes | _keyword_nodes | _alias_nodes |
_expr_context_nodes | _unsupported_expr_nodes) -
_hacked_nodes)
# we're adding a different assignment in some cases to be equality comparison
# and we don't want `stmt` and friends in their so get only the class whose
# names are capitalized
_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
_msg = 'cannot both support and not support {intersection}'.format(
intersection=_unsupported_nodes & _base_supported_nodes)
assert not _unsupported_nodes & _base_supported_nodes, _msg
def _node_not_implemented(node_name, cls):
"""Return a function that raises a NotImplementedError with a passed node
name.
"""
def f(self, *args, **kwargs):
raise NotImplementedError("{name!r} nodes are not "
"implemented".format(name=node_name))
return f
def disallow(nodes):
"""Decorator to disallow certain nodes from parsing. Raises a
NotImplementedError instead.
Returns
-------
disallowed : callable
"""
def disallowed(cls):
cls.unsupported_nodes = ()
for node in nodes:
new_method = _node_not_implemented(node, cls)
name = 'visit_{node}'.format(node=node)
cls.unsupported_nodes += (name,)
setattr(cls, name, new_method)
return cls
return disallowed
def _op_maker(op_class, op_symbol):
"""Return a function to create an op class with its symbol already passed.
Returns
-------
f : callable
"""
def f(self, node, *args, **kwargs):
"""Return a partial function with an Op subclass with an operator
already passed.
Returns
-------
f : callable
"""
return partial(op_class, op_symbol, *args, **kwargs)
return f
_op_classes = {'binary': BinOp, 'unary': UnaryOp}
def add_ops(op_classes):
"""Decorator to add default implementation of ops."""
def f(cls):
for op_attr_name, op_class in compat.iteritems(op_classes):
ops = getattr(cls, '{name}_ops'.format(name=op_attr_name))
ops_map = getattr(cls, '{name}_op_nodes_map'.format(
name=op_attr_name))
for op in ops:
op_node = ops_map[op]
if op_node is not None:
made_op = _op_maker(op_class, op)
setattr(cls, 'visit_{node}'.format(node=op_node), made_op)
return cls
return f
@disallow(_unsupported_nodes)
@add_ops(_op_classes)
class BaseExprVisitor(ast.NodeVisitor):
"""Custom ast walker. Parsers of other engines should subclass this class
if necessary.
Parameters
----------
env : Scope
engine : str
parser : str
preparser : callable
"""
const_type = Constant
term_type = Term
binary_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms
binary_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'In', 'NotIn',
'BitAnd', 'BitOr', 'And', 'Or', 'Add', 'Sub', 'Mult',
None, 'Pow', 'FloorDiv', 'Mod')
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
unary_ops = _unary_ops_syms
unary_op_nodes = 'UAdd', 'USub', 'Invert', 'Not'
unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
rewrite_map = {
ast.Eq: ast.In,
ast.NotEq: ast.NotIn,
ast.In: ast.In,
ast.NotIn: ast.NotIn
}
def __init__(self, env, engine, parser, preparser=_preparse):
self.env = env
self.engine = engine
self.parser = parser
self.preparser = preparser
self.assigner = None
def visit(self, node, **kwargs):
if isinstance(node, string_types):
clean = self.preparser(node)
try:
node = ast.fix_missing_locations(ast.parse(clean))
except SyntaxError as e:
from keyword import iskeyword
if any(iskeyword(x) for x in clean.split()):
e.msg = ("Python keyword not valid identifier"
" in numexpr query")
raise e
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method)
return visitor(node, **kwargs)
def visit_Module(self, node, **kwargs):
if len(node.body) != 1:
raise SyntaxError('only a single expression is allowed')
expr = node.body[0]
return self.visit(expr, **kwargs)
def visit_Expr(self, node, **kwargs):
return self.visit(node.value, **kwargs)
def _rewrite_membership_op(self, node, left, right):
# the kind of the operator (is actually an instance)
op_instance = node.op
op_type = type(op_instance)
Loading ...