"""
Expressions
-----------
Offer fast expression evaluation through numexpr
"""
import warnings
import numpy as np
from pandas._config import get_option
from pandas._libs.lib import values_from_object
from pandas.core.dtypes.generic import ABCDataFrame
from pandas.core.computation.check import _NUMEXPR_INSTALLED
if _NUMEXPR_INSTALLED:
import numexpr as ne
_TEST_MODE = None
_TEST_RESULT = None
_USE_NUMEXPR = _NUMEXPR_INSTALLED
_evaluate = None
_where = None
# the set of dtypes that we will allow pass to numexpr
_ALLOWED_DTYPES = {
"evaluate": {"int64", "int32", "float64", "float32", "bool"},
"where": {"int64", "float64", "bool"},
}
# the minimum prod shape that we will use numexpr
_MIN_ELEMENTS = 10000
def set_use_numexpr(v=True):
# set/unset to use numexpr
global _USE_NUMEXPR
if _NUMEXPR_INSTALLED:
_USE_NUMEXPR = v
# choose what we are going to do
global _evaluate, _where
if not _USE_NUMEXPR:
_evaluate = _evaluate_standard
_where = _where_standard
else:
_evaluate = _evaluate_numexpr
_where = _where_numexpr
def set_numexpr_threads(n=None):
# if we are using numexpr, set the threads to n
# otherwise reset
if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
if n is None:
n = ne.detect_number_of_cores()
ne.set_num_threads(n)
def _evaluate_standard(op, op_str, a, b, **eval_kwargs):
""" standard evaluation """
if _TEST_MODE:
_store_test_result(False)
with np.errstate(all="ignore"):
return op(a, b)
def _can_use_numexpr(op, op_str, a, b, dtype_check):
""" return a boolean if we WILL be using numexpr """
if op_str is not None:
# required min elements (otherwise we are adding overhead)
if np.prod(a.shape) > _MIN_ELEMENTS:
# check for dtype compatibility
dtypes = set()
for o in [a, b]:
# Series implements dtypes, check for dimension count as well
if hasattr(o, "dtypes") and o.ndim > 1:
s = o.dtypes.value_counts()
if len(s) > 1:
return False
dtypes |= set(s.index.astype(str))
# ndarray and Series Case
elif hasattr(o, "dtype"):
dtypes |= {o.dtype.name}
# allowed are a superset
if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
return True
return False
def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs):
result = None
if _can_use_numexpr(op, op_str, a, b, "evaluate"):
try:
# we were originally called by a reversed op
# method
if reversed:
a, b = b, a
a_value = getattr(a, "values", a)
b_value = getattr(b, "values", b)
result = ne.evaluate(
"a_value {op} b_value".format(op=op_str),
local_dict={"a_value": a_value, "b_value": b_value},
casting="safe",
truediv=truediv,
**eval_kwargs
)
except ValueError as detail:
if "unknown type object" in str(detail):
pass
if _TEST_MODE:
_store_test_result(result is not None)
if result is None:
result = _evaluate_standard(op, op_str, a, b)
return result
def _where_standard(cond, a, b):
return np.where(
values_from_object(cond), values_from_object(a), values_from_object(b)
)
def _where_numexpr(cond, a, b):
result = None
if _can_use_numexpr(None, "where", a, b, "where"):
try:
cond_value = getattr(cond, "values", cond)
a_value = getattr(a, "values", a)
b_value = getattr(b, "values", b)
result = ne.evaluate(
"where(cond_value, a_value, b_value)",
local_dict={
"cond_value": cond_value,
"a_value": a_value,
"b_value": b_value,
},
casting="safe",
)
except ValueError as detail:
if "unknown type object" in str(detail):
pass
except Exception as detail:
raise TypeError(str(detail))
if result is None:
result = _where_standard(cond, a, b)
return result
# turn myself on
set_use_numexpr(get_option("compute.use_numexpr"))
def _has_bool_dtype(x):
try:
if isinstance(x, ABCDataFrame):
return "bool" in x.dtypes
else:
return x.dtype == bool
except AttributeError:
return isinstance(x, (bool, np.bool_))
def _bool_arith_check(
op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
):
if unsupported is None:
unsupported = {"+": "|", "*": "&", "-": "^"}
if _has_bool_dtype(a) and _has_bool_dtype(b):
if op_str in unsupported:
warnings.warn(
"evaluating in Python space because the {op!r} "
"operator is not supported by numexpr for "
"the bool dtype, use {alt_op!r} instead".format(
op=op_str, alt_op=unsupported[op_str]
)
)
return False
if op_str in not_allowed:
raise NotImplementedError(
"operator {op!r} not implemented for " "bool dtypes".format(op=op_str)
)
return True
def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs):
""" evaluate and return the expression of the op on a and b
Parameters
----------
op : the actual operand
op_str: the string version of the op
a : left operand
b : right operand
use_numexpr : whether to try to use numexpr (default True)
"""
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
if use_numexpr:
return _evaluate(op, op_str, a, b, **eval_kwargs)
return _evaluate_standard(op, op_str, a, b)
def where(cond, a, b, use_numexpr=True):
""" evaluate the where condition cond on a and b
Parameters
----------
cond : a boolean array
a : return if cond is True
b : return if cond is False
use_numexpr : whether to try to use numexpr (default True)
"""
if use_numexpr:
return _where(cond, a, b)
return _where_standard(cond, a, b)
def set_test_mode(v=True):
"""
Keeps track of whether numexpr was used. Stores an additional ``True``
for every successful use of evaluate with numexpr since the last
``get_test_result``
"""
global _TEST_MODE, _TEST_RESULT
_TEST_MODE = v
_TEST_RESULT = []
def _store_test_result(used_numexpr):
global _TEST_RESULT
if used_numexpr:
_TEST_RESULT.append(used_numexpr)
def get_test_result():
"""get test result and reset test_results"""
global _TEST_RESULT
res = _TEST_RESULT
_TEST_RESULT = []
return res