"""
Base and utility classes for pandas objects.
"""
import textwrap
import warnings
import numpy as np
import pandas._libs.lib as lib
import pandas.compat as compat
from pandas.compat import PYPY, OrderedDict, builtins, map, range
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution, cache_readonly
from pandas.util._validators import validate_bool_kwarg
from pandas.core.dtypes.common import (
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike,
is_extension_array_dtype, is_extension_type, is_list_like, is_object_dtype,
is_scalar, is_timedelta64_ns_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core import algorithms, common as com
from pandas.core.accessor import DirNamesMixin
import pandas.core.nanops as nanops
_shared_docs = dict()
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
unique='IndexOpsMixin', duplicated='IndexOpsMixin')
class StringMixin(object):
"""implements string methods so long as object defines a `__unicode__`
method.
Handles Python2/3 compatibility transparently.
"""
# side note - this could be made into a metaclass if more than one
# object needs
# ----------------------------------------------------------------------
# Formatting
def __unicode__(self):
raise AbstractMethodError(self)
def __str__(self):
"""
Return a string representation for a particular Object
Invoked by str(df) in both py2/py3.
Yields Bytestring in Py2, Unicode String in py3.
"""
if compat.PY3:
return self.__unicode__()
return self.__bytes__()
def __bytes__(self):
"""
Return a string representation for a particular object.
Invoked by bytes(obj) in py3 only.
Yields a bytestring in both py2/py3.
"""
from pandas.core.config import get_option
encoding = get_option("display.encoding")
return self.__unicode__().encode(encoding, 'replace')
def __repr__(self):
"""
Return a string representation for a particular object.
Yields Bytestring in Py2, Unicode String in py3.
"""
return str(self)
class PandasObject(StringMixin, DirNamesMixin):
"""baseclass for various pandas objects"""
@property
def _constructor(self):
"""class constructor (for this class it's just `__class__`"""
return self.__class__
def __unicode__(self):
"""
Return a string representation for a particular object.
Invoked by unicode(obj) in py2 only. Yields a Unicode String in both
py2/py3.
"""
# Should be overwritten by base classes
return object.__repr__(self)
def _reset_cache(self, key=None):
"""
Reset cached properties. If ``key`` is passed, only clears that key.
"""
if getattr(self, '_cache', None) is None:
return
if key is None:
self._cache.clear()
else:
self._cache.pop(key, None)
def __sizeof__(self):
"""
Generates the total memory usage for an object that returns
either a value or Series of values
"""
if hasattr(self, 'memory_usage'):
mem = self.memory_usage(deep=True)
if not is_scalar(mem):
mem = mem.sum()
return int(mem)
# no memory_usage attribute, so fall back to
# object's 'sizeof'
return super(PandasObject, self).__sizeof__()
class NoNewAttributesMixin(object):
"""Mixin which prevents adding new attributes.
Prevents additional attributes via xxx.attribute = "something" after a
call to `self.__freeze()`. Mainly used to prevent the user from using
wrong attributes on a accessor (`Series.cat/.str/.dt`).
If you really want to add a new attribute at a later time, you need to use
`object.__setattr__(self, key, value)`.
"""
def _freeze(self):
"""Prevents setting additional attributes"""
object.__setattr__(self, "__frozen", True)
# prevent adding any attribute via s.xxx.new_attribute = ...
def __setattr__(self, key, value):
# _cache is used by a decorator
# We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)
# because
# 1.) getattr is false for attributes that raise errors
# 2.) cls.__dict__ doesn't traverse into base classes
if (getattr(self, "__frozen", False) and not
(key == "_cache" or
key in type(self).__dict__ or
getattr(self, key, None) is not None)):
raise AttributeError("You cannot add any new attribute '{key}'".
format(key=key))
object.__setattr__(self, key, value)
class GroupByError(Exception):
pass
class DataError(GroupByError):
pass
class SpecificationError(GroupByError):
pass
class SelectionMixin(object):
"""
mixin implementing the selection & aggregation interface on a group-like
object sub-classes need to define: obj, exclusions
"""
_selection = None
_internal_names = ['_cache', '__setstate__']
_internal_names_set = set(_internal_names)
_builtin_table = OrderedDict((
(builtins.sum, np.sum),
(builtins.max, np.max),
(builtins.min, np.min),
))
_cython_table = OrderedDict((
(builtins.sum, 'sum'),
(builtins.max, 'max'),
(builtins.min, 'min'),
(np.all, 'all'),
(np.any, 'any'),
(np.sum, 'sum'),
(np.nansum, 'sum'),
(np.mean, 'mean'),
(np.nanmean, 'mean'),
(np.prod, 'prod'),
(np.nanprod, 'prod'),
(np.std, 'std'),
(np.nanstd, 'std'),
(np.var, 'var'),
(np.nanvar, 'var'),
(np.median, 'median'),
(np.nanmedian, 'median'),
(np.max, 'max'),
(np.nanmax, 'max'),
(np.min, 'min'),
(np.nanmin, 'min'),
(np.cumprod, 'cumprod'),
(np.nancumprod, 'cumprod'),
(np.cumsum, 'cumsum'),
(np.nancumsum, 'cumsum'),
))
@property
def _selection_name(self):
"""
return a name for myself; this would ideally be called
the 'name' property, but we cannot conflict with the
Series.name property which can be set
"""
if self._selection is None:
return None # 'result'
else:
return self._selection
@property
def _selection_list(self):
if not isinstance(self._selection, (list, tuple, ABCSeries,
ABCIndexClass, np.ndarray)):
return [self._selection]
return self._selection
@cache_readonly
def _selected_obj(self):
if self._selection is None or isinstance(self.obj, ABCSeries):
return self.obj
else:
return self.obj[self._selection]
@cache_readonly
def ndim(self):
return self._selected_obj.ndim
@cache_readonly
def _obj_with_exclusions(self):
if self._selection is not None and isinstance(self.obj,
ABCDataFrame):
return self.obj.reindex(columns=self._selection_list)
if len(self.exclusions) > 0:
return self.obj.drop(self.exclusions, axis=1)
else:
return self.obj
def __getitem__(self, key):
if self._selection is not None:
raise IndexError('Column(s) {selection} already selected'
.format(selection=self._selection))
if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
np.ndarray)):
if len(self.obj.columns.intersection(key)) != len(key):
bad_keys = list(set(key).difference(self.obj.columns))
raise KeyError("Columns not found: {missing}"
.format(missing=str(bad_keys)[1:-1]))
return self._gotitem(list(key), ndim=2)
elif not getattr(self, 'as_index', False):
if key not in self.obj.columns:
raise KeyError("Column not found: {key}".format(key=key))
return self._gotitem(key, ndim=2)
else:
if key not in self.obj:
raise KeyError("Column not found: {key}".format(key=key))
return self._gotitem(key, ndim=1)
def _gotitem(self, key, ndim, subset=None):
"""
sub-classes to define
return a sliced object
Parameters
----------
key : string / list of selections
ndim : 1,2
requested ndim of result
subset : object, default None
subset to act on
"""
raise AbstractMethodError(self)
def aggregate(self, func, *args, **kwargs):
raise AbstractMethodError(self)
agg = aggregate
def _try_aggregate_string_function(self, arg, *args, **kwargs):
"""
if arg is a string, then try to operate on it:
- try to find a function (or attribute) on ourselves
- try to find a numpy function
- raise
"""
assert isinstance(arg, compat.string_types)
f = getattr(self, arg, None)
if f is not None:
if callable(f):
return f(*args, **kwargs)
# people may try to aggregate on a non-callable attribute
# but don't let them think they can pass args to it
assert len(args) == 0
assert len([kwarg for kwarg in kwargs
if kwarg not in ['axis', '_level']]) == 0
return f
f = getattr(np, arg, None)
if f is not None:
return f(self, *args, **kwargs)
raise ValueError("{arg} is an unknown string function".format(arg=arg))
def _aggregate(self, arg, *args, **kwargs):
"""
provide an implementation for the aggregators
Parameters
----------
arg : string, dict, function
*args : args to pass on to the function
**kwargs : kwargs to pass on to the function
Returns
-------
tuple of result, how
Notes
-----
how can be a string describe the required post-processing, or
None if not required
"""
Loading ...