from distutils.version import LooseVersion
import functools
import itertools
import operator
import warnings
import numpy as np
from pandas._libs import iNaT, lib, tslibs
import pandas.compat as compat
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
from pandas.core.dtypes.common import (
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
is_object_dtype, is_scalar, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
import pandas.core.common as com
from pandas.core.config import get_option
_BOTTLENECK_INSTALLED = False
_MIN_BOTTLENECK_VERSION = '1.0.0'
try:
import bottleneck as bn
ver = bn.__version__
_BOTTLENECK_INSTALLED = (LooseVersion(ver) >=
LooseVersion(_MIN_BOTTLENECK_VERSION))
if not _BOTTLENECK_INSTALLED:
warnings.warn(
"The installed version of bottleneck {ver} is not supported "
"in pandas and will be not be used\nThe minimum supported "
"version is {min_ver}\n".format(
ver=ver, min_ver=_MIN_BOTTLENECK_VERSION), UserWarning)
except ImportError: # pragma: no cover
pass
_USE_BOTTLENECK = False
def set_use_bottleneck(v=True):
# set/unset to use bottleneck
global _USE_BOTTLENECK
if _BOTTLENECK_INSTALLED:
_USE_BOTTLENECK = v
set_use_bottleneck(get_option('compute.use_bottleneck'))
class disallow(object):
def __init__(self, *dtypes):
super(disallow, self).__init__()
self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
def check(self, obj):
return hasattr(obj, 'dtype') and issubclass(obj.dtype.type,
self.dtypes)
def __call__(self, f):
@functools.wraps(f)
def _f(*args, **kwargs):
obj_iter = itertools.chain(args, compat.itervalues(kwargs))
if any(self.check(obj) for obj in obj_iter):
msg = 'reduction operation {name!r} not allowed for this dtype'
raise TypeError(msg.format(name=f.__name__.replace('nan', '')))
try:
with np.errstate(invalid='ignore'):
return f(*args, **kwargs)
except ValueError as e:
# we want to transform an object array
# ValueError message to the more typical TypeError
# e.g. this is normally a disallowed function on
# object arrays that contain strings
if is_object_dtype(args[0]):
raise TypeError(e)
raise
return _f
class bottleneck_switch(object):
def __init__(self, **kwargs):
self.kwargs = kwargs
def __call__(self, alt):
bn_name = alt.__name__
try:
bn_func = getattr(bn, bn_name)
except (AttributeError, NameError): # pragma: no cover
bn_func = None
@functools.wraps(alt)
def f(values, axis=None, skipna=True, **kwds):
if len(self.kwargs) > 0:
for k, v in compat.iteritems(self.kwargs):
if k not in kwds:
kwds[k] = v
try:
if values.size == 0 and kwds.get('min_count') is None:
# We are empty, returning NA for our type
# Only applies for the default `min_count` of None
# since that affects how empty arrays are handled.
# TODO(GH-18976) update all the nanops methods to
# correctly handle empty inputs and remove this check.
# It *may* just be `var`
return _na_for_min_count(values, axis)
if (_USE_BOTTLENECK and skipna and
_bn_ok_dtype(values.dtype, bn_name)):
result = bn_func(values, axis=axis, **kwds)
# prefer to treat inf/-inf as NA, but must compute the func
# twice :(
if _has_infs(result):
result = alt(values, axis=axis, skipna=skipna, **kwds)
else:
result = alt(values, axis=axis, skipna=skipna, **kwds)
except Exception:
try:
result = alt(values, axis=axis, skipna=skipna, **kwds)
except ValueError as e:
# we want to transform an object array
# ValueError message to the more typical TypeError
# e.g. this is normally a disallowed function on
# object arrays that contain strings
if is_object_dtype(values):
raise TypeError(e)
raise
return result
return f
def _bn_ok_dtype(dt, name):
# Bottleneck chokes on datetime64
if (not is_object_dtype(dt) and
not (is_datetime_or_timedelta_dtype(dt) or
is_datetime64tz_dtype(dt))):
# GH 15507
# bottleneck does not properly upcast during the sum
# so can overflow
# GH 9422
# further we also want to preserve NaN when all elements
# are NaN, unlinke bottleneck/numpy which consider this
# to be 0
if name in ['nansum', 'nanprod']:
return False
return True
return False
def _has_infs(result):
if isinstance(result, np.ndarray):
if result.dtype == 'f8':
return lib.has_infs_f8(result.ravel())
elif result.dtype == 'f4':
return lib.has_infs_f4(result.ravel())
try:
return np.isinf(result).any()
except (TypeError, NotImplementedError):
# if it doesn't support infs, then it can't have infs
return False
def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
""" return the correct fill value for the dtype of the values """
if fill_value is not None:
return fill_value
if _na_ok_dtype(dtype):
if fill_value_typ is None:
return np.nan
else:
if fill_value_typ == '+inf':
return np.inf
else:
return -np.inf
else:
if fill_value_typ is None:
return tslibs.iNaT
else:
if fill_value_typ == '+inf':
# need the max int here
return _int64_max
else:
return tslibs.iNaT
def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
isfinite=False, copy=True, mask=None):
""" utility to get the values view, mask, dtype
if necessary copy and mask using the specified fill_value
copy = True will force the copy
"""
if is_datetime64tz_dtype(values):
# com.values_from_object returns M8[ns] dtype instead of tz-aware,
# so this case must be handled separately from the rest
dtype = values.dtype
values = getattr(values, "_values", values)
else:
values = com.values_from_object(values)
dtype = values.dtype
if mask is None:
if isfinite:
mask = _isfinite(values)
else:
mask = isna(values)
if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
# changing timedelta64/datetime64 to int64 needs to happen after
# finding `mask` above
values = getattr(values, "asi8", values)
values = values.view(np.int64)
dtype_ok = _na_ok_dtype(dtype)
# get our fill value (in case we need to provide an alternative
# dtype for it)
fill_value = _get_fill_value(dtype, fill_value=fill_value,
fill_value_typ=fill_value_typ)
if skipna:
if copy:
values = values.copy()
if dtype_ok:
np.putmask(values, mask, fill_value)
# promote if needed
else:
values, changed = maybe_upcast_putmask(values, mask, fill_value)
elif copy:
values = values.copy()
# return a platform independent precision dtype
dtype_max = dtype
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
dtype_max = np.int64
elif is_float_dtype(dtype):
dtype_max = np.float64
return values, mask, dtype, dtype_max, fill_value
def _isfinite(values):
if is_datetime_or_timedelta_dtype(values):
return isna(values)
if (is_complex_dtype(values) or is_float_dtype(values) or
is_integer_dtype(values) or is_bool_dtype(values)):
return ~np.isfinite(values)
return ~np.isfinite(values.astype('float64'))
def _na_ok_dtype(dtype):
# TODO: what about datetime64tz? PeriodDtype?
return not issubclass(dtype.type,
(np.integer, np.timedelta64, np.datetime64))
def _wrap_results(result, dtype, fill_value=None):
""" wrap our results if needed """
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
if fill_value is None:
# GH#24293
fill_value = iNaT
if not isinstance(result, np.ndarray):
tz = getattr(dtype, 'tz', None)
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan
result = tslibs.Timestamp(result, tz=tz)
else:
result = result.view(dtype)
elif is_timedelta64_dtype(dtype):
if not isinstance(result, np.ndarray):
if result == fill_value:
result = np.nan
# raise if we have a timedelta64[ns] which is too large
if np.fabs(result) > _int64_max:
raise ValueError("overflow in timedelta operation")
result = tslibs.Timedelta(result, unit='ns')
else:
result = result.astype('i8').view(dtype)
return result
def _na_for_min_count(values, axis):
"""Return the missing value for `values`
Parameters
----------
values : ndarray
axis : int or None
axis for the reduction
Returns
-------
result : scalar or ndarray
For 1-D values, returns a scalar of the correct missing type.
For 2-D values, returns a 1-D array where each element is missing.
"""
# we either return np.nan or pd.NaT
if is_numeric_dtype(values):
values = values.astype('float64')
fill_value = na_value_for_dtype(values.dtype)
if values.ndim == 1:
return fill_value
else:
result_shape = (values.shape[:axis] +
values.shape[axis + 1:])
result = np.empty(result_shape, dtype=values.dtype)
result.fill(fill_value)
return result
def nanany(values, axis=None, skipna=True, mask=None):
"""
Check if any elements along an axis evaluate to True.
Parameters
----------
values : ndarray
axis : int, optional
skipna : bool, default True
Loading ...