core/computation/expressions.py · aaronreidsmith/pandas

aaronreidsmith / pandas python

Repository URL to install this package:
Version: 0.25.3

/ core / computation / expressions.py

"""
Expressions
-----------

Offer fast expression evaluation through numexpr

"""

import warnings

import numpy as np

from pandas._config import get_option

from pandas._libs.lib import values_from_object

from pandas.core.dtypes.generic import ABCDataFrame

from pandas.core.computation.check import _NUMEXPR_INSTALLED

if _NUMEXPR_INSTALLED:
    import numexpr as ne

_TEST_MODE = None
_TEST_RESULT = None
_USE_NUMEXPR = _NUMEXPR_INSTALLED
_evaluate = None
_where = None

# the set of dtypes that we will allow pass to numexpr
_ALLOWED_DTYPES = {
    "evaluate": {"int64", "int32", "float64", "float32", "bool"},
    "where": {"int64", "float64", "bool"},
}

# the minimum prod shape that we will use numexpr
_MIN_ELEMENTS = 10000


def set_use_numexpr(v=True):
    # set/unset to use numexpr
    global _USE_NUMEXPR
    if _NUMEXPR_INSTALLED:
        _USE_NUMEXPR = v

    # choose what we are going to do
    global _evaluate, _where
    if not _USE_NUMEXPR:
        _evaluate = _evaluate_standard
        _where = _where_standard
    else:
        _evaluate = _evaluate_numexpr
        _where = _where_numexpr


def set_numexpr_threads(n=None):
    # if we are using numexpr, set the threads to n
    # otherwise reset
    if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
        if n is None:
            n = ne.detect_number_of_cores()
        ne.set_num_threads(n)


def _evaluate_standard(op, op_str, a, b, **eval_kwargs):
    """ standard evaluation """
    if _TEST_MODE:
        _store_test_result(False)
    with np.errstate(all="ignore"):
        return op(a, b)


def _can_use_numexpr(op, op_str, a, b, dtype_check):
    """ return a boolean if we WILL be using numexpr """
    if op_str is not None:

        # required min elements (otherwise we are adding overhead)
        if np.prod(a.shape) > _MIN_ELEMENTS:
            # check for dtype compatibility
            dtypes = set()
            for o in [a, b]:
                # Series implements dtypes, check for dimension count as well
                if hasattr(o, "dtypes") and o.ndim > 1:
                    s = o.dtypes.value_counts()
                    if len(s) > 1:
                        return False
                    dtypes |= set(s.index.astype(str))
                # ndarray and Series Case
                elif hasattr(o, "dtype"):
                    dtypes |= {o.dtype.name}

            # allowed are a superset
            if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
                return True

    return False


def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs):
    result = None

    if _can_use_numexpr(op, op_str, a, b, "evaluate"):
        try:

            # we were originally called by a reversed op
            # method
            if reversed:
                a, b = b, a

            a_value = getattr(a, "values", a)
            b_value = getattr(b, "values", b)
            result = ne.evaluate(
                "a_value {op} b_value".format(op=op_str),
                local_dict={"a_value": a_value, "b_value": b_value},
                casting="safe",
                truediv=truediv,
                **eval_kwargs
            )
        except ValueError as detail:
            if "unknown type object" in str(detail):
                pass

    if _TEST_MODE:
        _store_test_result(result is not None)

    if result is None:
        result = _evaluate_standard(op, op_str, a, b)

    return result


def _where_standard(cond, a, b):
    return np.where(
        values_from_object(cond), values_from_object(a), values_from_object(b)
    )


def _where_numexpr(cond, a, b):
    result = None

    if _can_use_numexpr(None, "where", a, b, "where"):

        try:
            cond_value = getattr(cond, "values", cond)
            a_value = getattr(a, "values", a)
            b_value = getattr(b, "values", b)
            result = ne.evaluate(
                "where(cond_value, a_value, b_value)",
                local_dict={
                    "cond_value": cond_value,
                    "a_value": a_value,
                    "b_value": b_value,
                },
                casting="safe",
            )
        except ValueError as detail:
            if "unknown type object" in str(detail):
                pass
        except Exception as detail:
            raise TypeError(str(detail))

    if result is None:
        result = _where_standard(cond, a, b)

    return result


# turn myself on
set_use_numexpr(get_option("compute.use_numexpr"))


def _has_bool_dtype(x):
    try:
        if isinstance(x, ABCDataFrame):
            return "bool" in x.dtypes
        else:
            return x.dtype == bool
    except AttributeError:
        return isinstance(x, (bool, np.bool_))


def _bool_arith_check(
    op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
):
    if unsupported is None:
        unsupported = {"+": "|", "*": "&", "-": "^"}

    if _has_bool_dtype(a) and _has_bool_dtype(b):
        if op_str in unsupported:
            warnings.warn(
                "evaluating in Python space because the {op!r} "
                "operator is not supported by numexpr for "
                "the bool dtype, use {alt_op!r} instead".format(
                    op=op_str, alt_op=unsupported[op_str]
                )
            )
            return False

        if op_str in not_allowed:
            raise NotImplementedError(
                "operator {op!r} not implemented for " "bool dtypes".format(op=op_str)
            )
    return True


def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs):
    """ evaluate and return the expression of the op on a and b

        Parameters
        ----------

        op :    the actual operand
        op_str: the string version of the op
        a :     left operand
        b :     right operand
        use_numexpr : whether to try to use numexpr (default True)
        """

    use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
    if use_numexpr:
        return _evaluate(op, op_str, a, b, **eval_kwargs)
    return _evaluate_standard(op, op_str, a, b)


def where(cond, a, b, use_numexpr=True):
    """ evaluate the where condition cond on a and b

        Parameters
        ----------

        cond : a boolean array
        a :    return if cond is True
        b :    return if cond is False
        use_numexpr : whether to try to use numexpr (default True)
        """

    if use_numexpr:
        return _where(cond, a, b)
    return _where_standard(cond, a, b)


def set_test_mode(v=True):
    """
    Keeps track of whether numexpr was used.  Stores an additional ``True``
    for every successful use of evaluate with numexpr since the last
    ``get_test_result``
    """
    global _TEST_MODE, _TEST_RESULT
    _TEST_MODE = v
    _TEST_RESULT = []


def _store_test_result(used_numexpr):
    global _TEST_RESULT
    if used_numexpr:
        _TEST_RESULT.append(used_numexpr)


def get_test_result():
    """get test result and reset test_results"""
    global _TEST_RESULT
    res = _TEST_RESULT
    _TEST_RESULT = []
    return res
aaronreidsmith / pandas python

Version: 0.25.3

/ core / computation / expressions.py

Products

About

Resources

Contact Gemfury