core/arrays/numpy_.py · agriconnect/pandas

Learn more » Push, build, and install RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages
agriconnect / pandas python

Repository URL to install this package:
Version: 0.24.2

/ core / arrays / numpy_.py

import numbers

import numpy as np

from pandas._libs import lib
from pandas.compat.numpy import function as nv
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.inference import is_array_like, is_list_like

from pandas import compat
from pandas.core import nanops
from pandas.core.missing import backfill_1d, pad_1d

from .base import ExtensionArray, ExtensionOpsMixin


class PandasDtype(ExtensionDtype):
    """
    A Pandas ExtensionDtype for NumPy dtypes.

    .. versionadded:: 0.24.0

    This is mostly for internal compatibility, and is not especially
    useful on its own.

    Parameters
    ----------
    dtype : numpy.dtype
    """
    _metadata = ('_dtype',)

    def __init__(self, dtype):
        dtype = np.dtype(dtype)
        self._dtype = dtype
        self._name = dtype.name
        self._type = dtype.type

    def __repr__(self):
        return "PandasDtype({!r})".format(self.name)

    @property
    def numpy_dtype(self):
        """The NumPy dtype this PandasDtype wraps."""
        return self._dtype

    @property
    def name(self):
        return self._name

    @property
    def type(self):
        return self._type

    @property
    def _is_numeric(self):
        # exclude object, str, unicode, void.
        return self.kind in set('biufc')

    @property
    def _is_boolean(self):
        return self.kind == 'b'

    @classmethod
    def construct_from_string(cls, string):
        return cls(np.dtype(string))

    def construct_array_type(cls):
        return PandasArray

    @property
    def kind(self):
        return self._dtype.kind

    @property
    def itemsize(self):
        """The element size of this data-type object."""
        return self._dtype.itemsize


# TODO(NumPy1.13): remove this
# Compat for NumPy 1.12, which doesn't provide NDArrayOperatorsMixin
# or __array_ufunc__, so those operations won't be available to people
# on older NumPys.
#
# We would normally write this as bases=(...), then "class Foo(*bases):
# but Python2 doesn't allow unpacking tuples in the class statement.
# So, we fall back to "object", to avoid writing a metaclass.
try:
    from numpy.lib.mixins import NDArrayOperatorsMixin
except ImportError:
    NDArrayOperatorsMixin = object


class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
    """
    A pandas ExtensionArray for NumPy data.

    .. versionadded :: 0.24.0

    This is mostly for internal compatibility, and is not especially
    useful on its own.

    Parameters
    ----------
    values : ndarray
        The NumPy ndarray to wrap. Must be 1-dimensional.
    copy : bool, default False
        Whether to copy `values`.

    Notes
    -----
    Operations like ``+`` and applying ufuncs requires NumPy>=1.13.
    """
    # If you're wondering why pd.Series(cls) doesn't put the array in an
    # ExtensionBlock, search for `ABCPandasArray`. We check for
    # that _typ to ensure that that users don't unnecessarily use EAs inside
    # pandas internals, which turns off things like block consolidation.
    _typ = "npy_extension"
    __array_priority__ = 1000

    # ------------------------------------------------------------------------
    # Constructors

    def __init__(self, values, copy=False):
        if isinstance(values, type(self)):
            values = values._ndarray
        if not isinstance(values, np.ndarray):
            raise ValueError("'values' must be a NumPy array.")

        if values.ndim != 1:
            raise ValueError("PandasArray must be 1-dimensional.")

        if copy:
            values = values.copy()

        self._ndarray = values
        self._dtype = PandasDtype(values.dtype)

    @classmethod
    def _from_sequence(cls, scalars, dtype=None, copy=False):
        if isinstance(dtype, PandasDtype):
            dtype = dtype._dtype

        result = np.asarray(scalars, dtype=dtype)
        if copy and result is scalars:
            result = result.copy()
        return cls(result)

    @classmethod
    def _from_factorized(cls, values, original):
        return cls(values)

    @classmethod
    def _concat_same_type(cls, to_concat):
        return cls(np.concatenate(to_concat))

    # ------------------------------------------------------------------------
    # Data

    @property
    def dtype(self):
        return self._dtype

    # ------------------------------------------------------------------------
    # NumPy Array Interface

    def __array__(self, dtype=None):
        return np.asarray(self._ndarray, dtype=dtype)

    _HANDLED_TYPES = (np.ndarray, numbers.Number)

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        # Lightly modified version of
        # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\
        # numpy.lib.mixins.NDArrayOperatorsMixin.html
        # The primary modification is not boxing scalar return values
        # in PandasArray, since pandas' ExtensionArrays are 1-d.
        out = kwargs.get('out', ())
        for x in inputs + out:
            # Only support operations with instances of _HANDLED_TYPES.
            # Use PandasArray instead of type(self) for isinstance to
            # allow subclasses that don't override __array_ufunc__ to
            # handle PandasArray objects.
            if not isinstance(x, self._HANDLED_TYPES + (PandasArray,)):
                return NotImplemented

        # Defer to the implementation of the ufunc on unwrapped values.
        inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x
                       for x in inputs)
        if out:
            kwargs['out'] = tuple(
                x._ndarray if isinstance(x, PandasArray) else x
                for x in out)
        result = getattr(ufunc, method)(*inputs, **kwargs)

        if type(result) is tuple and len(result):
            # multiple return values
            if not lib.is_scalar(result[0]):
                # re-box array-like results
                return tuple(type(self)(x) for x in result)
            else:
                # but not scalar reductions
                return result
        elif method == 'at':
            # no return value
            return None
        else:
            # one return value
            if not lib.is_scalar(result):
                # re-box array-like results, but not scalar reductions
                result = type(self)(result)
            return result

    # ------------------------------------------------------------------------
    # Pandas ExtensionArray Interface

    def __getitem__(self, item):
        if isinstance(item, type(self)):
            item = item._ndarray

        result = self._ndarray[item]
        if not lib.is_scalar(item):
            result = type(self)(result)
        return result

    def __setitem__(self, key, value):
        from pandas.core.internals.arrays import extract_array

        value = extract_array(value, extract_numpy=True)

        if not lib.is_scalar(key) and is_list_like(key):
            key = np.asarray(key)

        if not lib.is_scalar(value):
            value = np.asarray(value)

        values = self._ndarray
        t = np.result_type(value, values)
        if t != self._ndarray.dtype:
            values = values.astype(t, casting='safe')
            values[key] = value
            self._dtype = PandasDtype(t)
            self._ndarray = values
        else:
            self._ndarray[key] = value

    def __len__(self):
        return len(self._ndarray)

    @property
    def nbytes(self):
        return self._ndarray.nbytes

    def isna(self):
        from pandas import isna

        return isna(self._ndarray)

    def fillna(self, value=None, method=None, limit=None):
        # TODO(_values_for_fillna): remove this
        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError("Length of 'value' does not match. Got ({}) "
                                 " expected {}".format(len(value), len(self)))
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == 'pad' else backfill_1d
                new_values = func(self._ndarray, limit=limit,
                                  mask=mask)
                new_values = self._from_sequence(new_values, dtype=self.dtype)
            else:
                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values

    def take(self, indices, allow_fill=False, fill_value=None):
        from pandas.core.algorithms import take

        result = take(self._ndarray, indices, allow_fill=allow_fill,
                      fill_value=fill_value)
        return type(self)(result)

    def copy(self, deep=False):
        return type(self)(self._ndarray.copy())

    def _values_for_argsort(self):
        return self._ndarray

    def _values_for_factorize(self):
        return self._ndarray, -1

    def unique(self):
        from pandas import unique

        return type(self)(unique(self._ndarray))

    # ------------------------------------------------------------------------
    # Reductions

    def _reduce(self, name, skipna=True, **kwargs):
        meth = getattr(self, name, None)
        if meth:
            return meth(skipna=skipna, **kwargs)
        else:
            msg = (
                "'{}' does not implement reduction '{}'"
            )
            raise TypeError(msg.format(type(self).__name__, name))

    def any(self, axis=None, out=None, keepdims=False, skipna=True):
        nv.validate_any((), dict(out=out, keepdims=keepdims))
        return nanops.nanany(self._ndarray, axis=axis, skipna=skipna)

    def all(self, axis=None, out=None, keepdims=False, skipna=True):
        nv.validate_all((), dict(out=out, keepdims=keepdims))
        return nanops.nanall(self._ndarray, axis=axis, skipna=skipna)

    def min(self, axis=None, out=None, keepdims=False, skipna=True):
        nv.validate_min((), dict(out=out, keepdims=keepdims))
        return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)

    def max(self, axis=None, out=None, keepdims=False, skipna=True):
        nv.validate_max((), dict(out=out, keepdims=keepdims))
        return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)

    def sum(self, axis=None, dtype=None, out=None, keepdims=False,
            initial=None, skipna=True, min_count=0):
        nv.validate_sum((), dict(dtype=dtype, out=out, keepdims=keepdims,
                                 initial=initial))
        return nanops.nansum(self._ndarray, axis=axis, skipna=skipna,
                             min_count=min_count)

    def prod(self, axis=None, dtype=None, out=None, keepdims=False,
Loading ...
agriconnect / pandas python

Version: 0.24.2

/ core / arrays / numpy_.py

Products

About

Resources

Contact Gemfury