Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / base.py

"""
Base and utility classes for pandas objects.
"""
import textwrap
import warnings

import numpy as np

import pandas._libs.lib as lib
import pandas.compat as compat
from pandas.compat import PYPY, OrderedDict, builtins, map, range
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution, cache_readonly
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.common import (
    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike,
    is_extension_array_dtype, is_extension_type, is_list_like, is_object_dtype,
    is_scalar, is_timedelta64_ns_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import algorithms, common as com
from pandas.core.accessor import DirNamesMixin
import pandas.core.nanops as nanops

_shared_docs = dict()
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
                            unique='IndexOpsMixin', duplicated='IndexOpsMixin')


class StringMixin(object):
    """implements string methods so long as object defines a `__unicode__`
    method.

    Handles Python2/3 compatibility transparently.
    """
    # side note - this could be made into a metaclass if more than one
    #             object needs

    # ----------------------------------------------------------------------
    # Formatting

    def __unicode__(self):
        raise AbstractMethodError(self)

    def __str__(self):
        """
        Return a string representation for a particular Object

        Invoked by str(df) in both py2/py3.
        Yields Bytestring in Py2, Unicode String in py3.
        """

        if compat.PY3:
            return self.__unicode__()
        return self.__bytes__()

    def __bytes__(self):
        """
        Return a string representation for a particular object.

        Invoked by bytes(obj) in py3 only.
        Yields a bytestring in both py2/py3.
        """
        from pandas.core.config import get_option

        encoding = get_option("display.encoding")
        return self.__unicode__().encode(encoding, 'replace')

    def __repr__(self):
        """
        Return a string representation for a particular object.

        Yields Bytestring in Py2, Unicode String in py3.
        """
        return str(self)


class PandasObject(StringMixin, DirNamesMixin):

    """baseclass for various pandas objects"""

    @property
    def _constructor(self):
        """class constructor (for this class it's just `__class__`"""
        return self.__class__

    def __unicode__(self):
        """
        Return a string representation for a particular object.

        Invoked by unicode(obj) in py2 only. Yields a Unicode String in both
        py2/py3.
        """
        # Should be overwritten by base classes
        return object.__repr__(self)

    def _reset_cache(self, key=None):
        """
        Reset cached properties. If ``key`` is passed, only clears that key.
        """
        if getattr(self, '_cache', None) is None:
            return
        if key is None:
            self._cache.clear()
        else:
            self._cache.pop(key, None)

    def __sizeof__(self):
        """
        Generates the total memory usage for an object that returns
        either a value or Series of values
        """
        if hasattr(self, 'memory_usage'):
            mem = self.memory_usage(deep=True)
            if not is_scalar(mem):
                mem = mem.sum()
            return int(mem)

        # no memory_usage attribute, so fall back to
        # object's 'sizeof'
        return super(PandasObject, self).__sizeof__()


class NoNewAttributesMixin(object):
    """Mixin which prevents adding new attributes.

    Prevents additional attributes via xxx.attribute = "something" after a
    call to `self.__freeze()`. Mainly used to prevent the user from using
    wrong attributes on a accessor (`Series.cat/.str/.dt`).

    If you really want to add a new attribute at a later time, you need to use
    `object.__setattr__(self, key, value)`.
    """

    def _freeze(self):
        """Prevents setting additional attributes"""
        object.__setattr__(self, "__frozen", True)

    # prevent adding any attribute via s.xxx.new_attribute = ...
    def __setattr__(self, key, value):
        # _cache is used by a decorator
        # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)
        # because
        # 1.) getattr is false for attributes that raise errors
        # 2.) cls.__dict__ doesn't traverse into base classes
        if (getattr(self, "__frozen", False) and not
                (key == "_cache" or
                 key in type(self).__dict__ or
                 getattr(self, key, None) is not None)):
            raise AttributeError("You cannot add any new attribute '{key}'".
                                 format(key=key))
        object.__setattr__(self, key, value)


class GroupByError(Exception):
    pass


class DataError(GroupByError):
    pass


class SpecificationError(GroupByError):
    pass


class SelectionMixin(object):
    """
    mixin implementing the selection & aggregation interface on a group-like
    object sub-classes need to define: obj, exclusions
    """
    _selection = None
    _internal_names = ['_cache', '__setstate__']
    _internal_names_set = set(_internal_names)

    _builtin_table = OrderedDict((
        (builtins.sum, np.sum),
        (builtins.max, np.max),
        (builtins.min, np.min),
    ))

    _cython_table = OrderedDict((
        (builtins.sum, 'sum'),
        (builtins.max, 'max'),
        (builtins.min, 'min'),
        (np.all, 'all'),
        (np.any, 'any'),
        (np.sum, 'sum'),
        (np.nansum, 'sum'),
        (np.mean, 'mean'),
        (np.nanmean, 'mean'),
        (np.prod, 'prod'),
        (np.nanprod, 'prod'),
        (np.std, 'std'),
        (np.nanstd, 'std'),
        (np.var, 'var'),
        (np.nanvar, 'var'),
        (np.median, 'median'),
        (np.nanmedian, 'median'),
        (np.max, 'max'),
        (np.nanmax, 'max'),
        (np.min, 'min'),
        (np.nanmin, 'min'),
        (np.cumprod, 'cumprod'),
        (np.nancumprod, 'cumprod'),
        (np.cumsum, 'cumsum'),
        (np.nancumsum, 'cumsum'),
    ))

    @property
    def _selection_name(self):
        """
        return a name for myself; this would ideally be called
        the 'name' property, but we cannot conflict with the
        Series.name property which can be set
        """
        if self._selection is None:
            return None  # 'result'
        else:
            return self._selection

    @property
    def _selection_list(self):
        if not isinstance(self._selection, (list, tuple, ABCSeries,
                                            ABCIndexClass, np.ndarray)):
            return [self._selection]
        return self._selection

    @cache_readonly
    def _selected_obj(self):

        if self._selection is None or isinstance(self.obj, ABCSeries):
            return self.obj
        else:
            return self.obj[self._selection]

    @cache_readonly
    def ndim(self):
        return self._selected_obj.ndim

    @cache_readonly
    def _obj_with_exclusions(self):
        if self._selection is not None and isinstance(self.obj,
                                                      ABCDataFrame):
            return self.obj.reindex(columns=self._selection_list)

        if len(self.exclusions) > 0:
            return self.obj.drop(self.exclusions, axis=1)
        else:
            return self.obj

    def __getitem__(self, key):
        if self._selection is not None:
            raise IndexError('Column(s) {selection} already selected'
                             .format(selection=self._selection))

        if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
                            np.ndarray)):
            if len(self.obj.columns.intersection(key)) != len(key):
                bad_keys = list(set(key).difference(self.obj.columns))
                raise KeyError("Columns not found: {missing}"
                               .format(missing=str(bad_keys)[1:-1]))
            return self._gotitem(list(key), ndim=2)

        elif not getattr(self, 'as_index', False):
            if key not in self.obj.columns:
                raise KeyError("Column not found: {key}".format(key=key))
            return self._gotitem(key, ndim=2)

        else:
            if key not in self.obj:
                raise KeyError("Column not found: {key}".format(key=key))
            return self._gotitem(key, ndim=1)

    def _gotitem(self, key, ndim, subset=None):
        """
        sub-classes to define
        return a sliced object

        Parameters
        ----------
        key : string / list of selections
        ndim : 1,2
            requested ndim of result
        subset : object, default None
            subset to act on

        """
        raise AbstractMethodError(self)

    def aggregate(self, func, *args, **kwargs):
        raise AbstractMethodError(self)

    agg = aggregate

    def _try_aggregate_string_function(self, arg, *args, **kwargs):
        """
        if arg is a string, then try to operate on it:
        - try to find a function (or attribute) on ourselves
        - try to find a numpy function
        - raise

        """
        assert isinstance(arg, compat.string_types)

        f = getattr(self, arg, None)
        if f is not None:
            if callable(f):
                return f(*args, **kwargs)

            # people may try to aggregate on a non-callable attribute
            # but don't let them think they can pass args to it
            assert len(args) == 0
            assert len([kwarg for kwarg in kwargs
                        if kwarg not in ['axis', '_level']]) == 0
            return f

        f = getattr(np, arg, None)
        if f is not None:
            return f(self, *args, **kwargs)

        raise ValueError("{arg} is an unknown string function".format(arg=arg))

    def _aggregate(self, arg, *args, **kwargs):
        """
        provide an implementation for the aggregators

        Parameters
        ----------
        arg : string, dict, function
        *args : args to pass on to the function
        **kwargs : kwargs to pass on to the function

        Returns
        -------
        tuple of result, how

        Notes
        -----
        how can be a string describe the required post-processing, or
        None if not required
        """
Loading ...