core/nanops.py · agriconnect/pandas

Learn more » Push, build, and install RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages
agriconnect / pandas python

Repository URL to install this package:
Version: 0.24.2

/ core / nanops.py

from distutils.version import LooseVersion
import functools
import itertools
import operator
import warnings

import numpy as np

from pandas._libs import iNaT, lib, tslibs
import pandas.compat as compat

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
from pandas.core.dtypes.common import (
    _get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
    is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
    is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
    is_object_dtype, is_scalar, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

import pandas.core.common as com
from pandas.core.config import get_option

_BOTTLENECK_INSTALLED = False
_MIN_BOTTLENECK_VERSION = '1.0.0'

try:
    import bottleneck as bn
    ver = bn.__version__
    _BOTTLENECK_INSTALLED = (LooseVersion(ver) >=
                             LooseVersion(_MIN_BOTTLENECK_VERSION))

    if not _BOTTLENECK_INSTALLED:
        warnings.warn(
            "The installed version of bottleneck {ver} is not supported "
            "in pandas and will be not be used\nThe minimum supported "
            "version is {min_ver}\n".format(
                ver=ver, min_ver=_MIN_BOTTLENECK_VERSION), UserWarning)

except ImportError:  # pragma: no cover
    pass


_USE_BOTTLENECK = False


def set_use_bottleneck(v=True):
    # set/unset to use bottleneck
    global _USE_BOTTLENECK
    if _BOTTLENECK_INSTALLED:
        _USE_BOTTLENECK = v


set_use_bottleneck(get_option('compute.use_bottleneck'))


class disallow(object):

    def __init__(self, *dtypes):
        super(disallow, self).__init__()
        self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

    def check(self, obj):
        return hasattr(obj, 'dtype') and issubclass(obj.dtype.type,
                                                    self.dtypes)

    def __call__(self, f):
        @functools.wraps(f)
        def _f(*args, **kwargs):
            obj_iter = itertools.chain(args, compat.itervalues(kwargs))
            if any(self.check(obj) for obj in obj_iter):
                msg = 'reduction operation {name!r} not allowed for this dtype'
                raise TypeError(msg.format(name=f.__name__.replace('nan', '')))
            try:
                with np.errstate(invalid='ignore'):
                    return f(*args, **kwargs)
            except ValueError as e:
                # we want to transform an object array
                # ValueError message to the more typical TypeError
                # e.g. this is normally a disallowed function on
                # object arrays that contain strings
                if is_object_dtype(args[0]):
                    raise TypeError(e)
                raise

        return _f


class bottleneck_switch(object):

    def __init__(self, **kwargs):
        self.kwargs = kwargs

    def __call__(self, alt):
        bn_name = alt.__name__

        try:
            bn_func = getattr(bn, bn_name)
        except (AttributeError, NameError):  # pragma: no cover
            bn_func = None

        @functools.wraps(alt)
        def f(values, axis=None, skipna=True, **kwds):
            if len(self.kwargs) > 0:
                for k, v in compat.iteritems(self.kwargs):
                    if k not in kwds:
                        kwds[k] = v
            try:
                if values.size == 0 and kwds.get('min_count') is None:
                    # We are empty, returning NA for our type
                    # Only applies for the default `min_count` of None
                    # since that affects how empty arrays are handled.
                    # TODO(GH-18976) update all the nanops methods to
                    # correctly handle empty inputs and remove this check.
                    # It *may* just be `var`
                    return _na_for_min_count(values, axis)

                if (_USE_BOTTLENECK and skipna and
                        _bn_ok_dtype(values.dtype, bn_name)):
                    result = bn_func(values, axis=axis, **kwds)

                    # prefer to treat inf/-inf as NA, but must compute the func
                    # twice :(
                    if _has_infs(result):
                        result = alt(values, axis=axis, skipna=skipna, **kwds)
                else:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
            except Exception:
                try:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
                except ValueError as e:
                    # we want to transform an object array
                    # ValueError message to the more typical TypeError
                    # e.g. this is normally a disallowed function on
                    # object arrays that contain strings

                    if is_object_dtype(values):
                        raise TypeError(e)
                    raise

            return result

        return f


def _bn_ok_dtype(dt, name):
    # Bottleneck chokes on datetime64
    if (not is_object_dtype(dt) and
            not (is_datetime_or_timedelta_dtype(dt) or
                 is_datetime64tz_dtype(dt))):

        # GH 15507
        # bottleneck does not properly upcast during the sum
        # so can overflow

        # GH 9422
        # further we also want to preserve NaN when all elements
        # are NaN, unlinke bottleneck/numpy which consider this
        # to be 0
        if name in ['nansum', 'nanprod']:
            return False

        return True
    return False


def _has_infs(result):
    if isinstance(result, np.ndarray):
        if result.dtype == 'f8':
            return lib.has_infs_f8(result.ravel())
        elif result.dtype == 'f4':
            return lib.has_infs_f4(result.ravel())
    try:
        return np.isinf(result).any()
    except (TypeError, NotImplementedError):
        # if it doesn't support infs, then it can't have infs
        return False


def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
    """ return the correct fill value for the dtype of the values """
    if fill_value is not None:
        return fill_value
    if _na_ok_dtype(dtype):
        if fill_value_typ is None:
            return np.nan
        else:
            if fill_value_typ == '+inf':
                return np.inf
            else:
                return -np.inf
    else:
        if fill_value_typ is None:
            return tslibs.iNaT
        else:
            if fill_value_typ == '+inf':
                # need the max int here
                return _int64_max
            else:
                return tslibs.iNaT


def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
                isfinite=False, copy=True, mask=None):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """

    if is_datetime64tz_dtype(values):
        # com.values_from_object returns M8[ns] dtype instead of tz-aware,
        #  so this case must be handled separately from the rest
        dtype = values.dtype
        values = getattr(values, "_values", values)
    else:
        values = com.values_from_object(values)
        dtype = values.dtype

    if mask is None:
        if isfinite:
            mask = _isfinite(values)
        else:
            mask = isna(values)

    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = getattr(values, "asi8", values)
        values = values.view(np.int64)

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value


def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isna(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))


def _na_ok_dtype(dtype):
    # TODO: what about datetime64tz?  PeriodDtype?
    return not issubclass(dtype.type,
                          (np.integer, np.timedelta64, np.datetime64))


def _wrap_results(result, dtype, fill_value=None):
    """ wrap our results if needed """

    if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        if fill_value is None:
            # GH#24293
            fill_value = iNaT
        if not isinstance(result, np.ndarray):
            tz = getattr(dtype, 'tz', None)
            assert not isna(fill_value), "Expected non-null fill_value"
            if result == fill_value:
                result = np.nan
            result = tslibs.Timestamp(result, tz=tz)
        else:
            result = result.view(dtype)
    elif is_timedelta64_dtype(dtype):
        if not isinstance(result, np.ndarray):
            if result == fill_value:
                result = np.nan

            # raise if we have a timedelta64[ns] which is too large
            if np.fabs(result) > _int64_max:
                raise ValueError("overflow in timedelta operation")

            result = tslibs.Timedelta(result, unit='ns')
        else:
            result = result.astype('i8').view(dtype)

    return result


def _na_for_min_count(values, axis):
    """Return the missing value for `values`

    Parameters
    ----------
    values : ndarray
    axis : int or None
        axis for the reduction

    Returns
    -------
    result : scalar or ndarray
        For 1-D values, returns a scalar of the correct missing type.
        For 2-D values, returns a 1-D array where each element is missing.
    """
    # we either return np.nan or pd.NaT
    if is_numeric_dtype(values):
        values = values.astype('float64')
    fill_value = na_value_for_dtype(values.dtype)

    if values.ndim == 1:
        return fill_value
    else:
        result_shape = (values.shape[:axis] +
                        values.shape[axis + 1:])
        result = np.empty(result_shape, dtype=values.dtype)
        result.fill(fill_value)
        return result


def nanany(values, axis=None, skipna=True, mask=None):
    """
    Check if any elements along an axis evaluate to True.

    Parameters
    ----------
    values : ndarray
    axis : int, optional
    skipna : bool, default True
Loading ...
agriconnect / pandas python

Version: 0.24.2

/ core / nanops.py

Products

About

Resources

Contact Gemfury