# pylint: disable=W0231,E1101
import collections
from datetime import timedelta
import functools
import gc
import json
import operator
from textwrap import dedent
import warnings
import weakref
import numpy as np
from pandas._libs import Timestamp, iNaT, properties
import pandas.compat as compat
from pandas.compat import (
cPickle as pkl, isidentifier, lrange, lzip, map, set_function_name,
string_types, to_str, zip)
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import (
Appender, Substitution, rewrite_axis_style_signature)
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.common import (
ensure_int64, ensure_object, is_bool, is_bool_dtype,
is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like,
is_extension_array_dtype, is_integer, is_list_like, is_number,
is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable,
is_scalar, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna
import pandas as pd
from pandas.core import config, missing, nanops
import pandas.core.algorithms as algos
from pandas.core.base import PandasObject, SelectionMixin
import pandas.core.common as com
from pandas.core.index import (
Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import Period, PeriodIndex
import pandas.core.indexing as indexing
from pandas.core.internals import BlockManager
from pandas.core.ops import _align_method_FRAME
from pandas.io.formats.format import DataFrameFormatter, format_percentiles
from pandas.io.formats.printing import pprint_thing
from pandas.tseries.frequencies import to_offset
# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs = dict()
_shared_doc_kwargs = dict(
axes='keywords for axes', klass='NDFrame',
axes_single_arg='int or labels for object',
args_transpose='axes to permute (int or label for object)',
optional_by="""
by : str or list of str
Name or list of names to sort by""")
# sentinel value to use as kwarg in place of None when None has special meaning
# and needs to be distinguished from a user explicitly passing None.
sentinel = object()
def _single_replace(self, to_replace, method, inplace, limit):
"""
Replaces values in a Series using the fill method specified when no
replacement value is given in the replace method
"""
if self.ndim != 1:
raise TypeError('cannot replace {0} with method {1} on a {2}'
.format(to_replace, method, type(self).__name__))
orig_dtype = self.dtype
result = self if inplace else self.copy()
fill_f = missing.get_fill_func(method)
mask = missing.mask_missing(result.values, to_replace)
values = fill_f(result.values, limit=limit, mask=mask)
if values.dtype == orig_dtype and inplace:
return
result = pd.Series(values, index=self.index,
dtype=self.dtype).__finalize__(self)
if inplace:
self._update_inplace(result._data)
return
return result
class NDFrame(PandasObject, SelectionMixin):
"""
N-dimensional analogue of DataFrame. Store multi-dimensional in a
size-mutable, labeled data structure
Parameters
----------
data : BlockManager
axes : list
copy : boolean, default False
"""
_internal_names = ['_data', '_cacher', '_item_cache', '_cache', '_is_copy',
'_subtyp', '_name', '_index', '_default_kind',
'_default_fill_value', '_metadata', '__array_struct__',
'__array_interface__']
_internal_names_set = set(_internal_names)
_accessors = frozenset()
_deprecations = frozenset(['as_blocks', 'blocks',
'convert_objects', 'is_copy'])
_metadata = []
_is_copy = None
# dummy attribute so that datetime.__eq__(Series/DataFrame) defers
# by returning NotImplemented
timetuple = None
# ----------------------------------------------------------------------
# Constructors
def __init__(self, data, axes=None, copy=False, dtype=None,
fastpath=False):
if not fastpath:
if dtype is not None:
data = data.astype(dtype)
elif copy:
data = data.copy()
if axes is not None:
for i, ax in enumerate(axes):
data = data.reindex_axis(ax, axis=i)
object.__setattr__(self, '_is_copy', None)
object.__setattr__(self, '_data', data)
object.__setattr__(self, '_item_cache', {})
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
""" passed a manager and a axes dict """
for a, axe in axes.items():
if axe is not None:
mgr = mgr.reindex_axis(axe,
axis=self._get_block_manager_axis(a),
copy=False)
# make a copy if explicitly requested
if copy:
mgr = mgr.copy()
if dtype is not None:
# avoid further copies if we can
if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
mgr = mgr.astype(dtype=dtype)
return mgr
# ----------------------------------------------------------------------
@property
def is_copy(self):
"""
Return the copy.
"""
warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
"in a future version.", FutureWarning, stacklevel=2)
return self._is_copy
@is_copy.setter
def is_copy(self, msg):
warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
"in a future version.", FutureWarning, stacklevel=2)
self._is_copy = msg
def _validate_dtype(self, dtype):
""" validate the passed dtype """
if dtype is not None:
dtype = pandas_dtype(dtype)
# a compound dtype
if dtype.kind == 'V':
raise NotImplementedError("compound dtypes are not implemented"
" in the {0} constructor"
.format(self.__class__.__name__))
return dtype
# ----------------------------------------------------------------------
# Construction
@property
def _constructor(self):
"""Used when a manipulation result has the same dimensions as the
original.
"""
raise AbstractMethodError(self)
@property
def _constructor_sliced(self):
"""Used when a manipulation result has one lower dimension(s) as the
original, such as DataFrame single columns slicing.
"""
raise AbstractMethodError(self)
@property
def _constructor_expanddim(self):
"""Used when a manipulation result has one higher dimension as the
original, such as Series.to_frame() and DataFrame.to_panel()
"""
raise NotImplementedError
# ----------------------------------------------------------------------
# Axis
@classmethod
def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
slicers=None, axes_are_reversed=False, build_axes=True,
ns=None, docs=None):
"""Provide axes setup for the major PandasObjects.
Parameters
----------
axes : the names of the axes in order (lowest to highest)
info_axis_num : the axis of the selector dimension (int)
stat_axis_num : the number of axis for the default stats (int)
aliases : other names for a single axis (dict)
slicers : how axes slice to others (dict)
axes_are_reversed : boolean whether to treat passed axes as
reversed (DataFrame)
build_axes : setup the axis properties (default True)
"""
cls._AXIS_ORDERS = axes
cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
cls._AXIS_LEN = len(axes)
cls._AXIS_ALIASES = aliases or dict()
cls._AXIS_IALIASES = {v: k for k, v in cls._AXIS_ALIASES.items()}
cls._AXIS_NAMES = dict(enumerate(axes))
cls._AXIS_SLICEMAP = slicers or None
cls._AXIS_REVERSED = axes_are_reversed
# typ
setattr(cls, '_typ', cls.__name__.lower())
# indexing support
cls._ix = None
if info_axis is not None:
cls._info_axis_number = info_axis
cls._info_axis_name = axes[info_axis]
if stat_axis is not None:
cls._stat_axis_number = stat_axis
cls._stat_axis_name = axes[stat_axis]
# setup the actual axis
if build_axes:
def set_axis(a, i):
setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
cls._internal_names_set.add(a)
if axes_are_reversed:
m = cls._AXIS_LEN - 1
for i, a in cls._AXIS_NAMES.items():
set_axis(a, m - i)
else:
for i, a in cls._AXIS_NAMES.items():
set_axis(a, i)
assert not isinstance(ns, dict)
def _construct_axes_dict(self, axes=None, **kwargs):
"""Return an axes dictionary for myself."""
d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
d.update(kwargs)
return d
@staticmethod
def _construct_axes_dict_from(self, axes, **kwargs):
"""Return an axes dictionary for the passed axes."""
d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
d.update(kwargs)
return d
def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
"""Return an axes dictionary for myself."""
d = {self._AXIS_SLICEMAP[a]: self._get_axis(a)
for a in (axes or self._AXIS_ORDERS)}
d.update(kwargs)
return d
def _construct_axes_from_arguments(
self, args, kwargs, require_all=False, sentinel=None):
"""Construct and returns axes if supplied in args/kwargs.
If require_all, raise if all axis arguments are not supplied
return a tuple of (axes, kwargs).
sentinel specifies the default parameter when an axis is not
supplied; useful to distinguish when a user explicitly passes None
in scenarios where None has special meaning.
"""
# construct the args
args = list(args)
for a in self._AXIS_ORDERS:
# if we have an alias for this axis
alias = self._AXIS_IALIASES.get(a)
if alias is not None:
if a in kwargs:
if alias in kwargs:
raise TypeError("arguments are mutually exclusive "
"for [%s,%s]" % (a, alias))
continue
if alias in kwargs:
kwargs[a] = kwargs.pop(alias)
continue
# look for a argument by position
if a not in kwargs:
try:
kwargs[a] = args.pop(0)
except IndexError:
if require_all:
raise TypeError("not enough/duplicate arguments "
"specified!")
axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS}
return axes, kwargs
@classmethod
def _from_axes(cls, data, axes, **kwargs):
# for construction from BlockManager
if isinstance(data, BlockManager):
return cls(data, **kwargs)
else:
if cls._AXIS_REVERSED:
axes = axes[::-1]
d = cls._construct_axes_dict_from(cls, axes, copy=False)
Loading ...