"""
Contains data structures designed for manipulating panel (3-dimensional) data
"""
# pylint: disable=E1103,W0231,W0212,W0621
from __future__ import division
import warnings
import numpy as np
import pandas.compat as compat
from pandas.compat import OrderedDict, map, range, u, zip
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
from pandas.util._validators import validate_axis_style_args
from pandas.core.dtypes.cast import (
cast_scalar_to_array, infer_dtype_from_scalar, maybe_cast_item)
from pandas.core.dtypes.common import (
is_integer, is_list_like, is_scalar, is_string_like)
from pandas.core.dtypes.missing import notna
import pandas.core.common as com
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import (
Index, MultiIndex, _get_objs_combined_axis, ensure_index)
import pandas.core.indexes.base as ibase
from pandas.core.indexing import maybe_droplevels
from pandas.core.internals import (
BlockManager, create_block_manager_from_arrays,
create_block_manager_from_blocks)
import pandas.core.ops as ops
from pandas.core.reshape.util import cartesian_product
from pandas.core.series import Series
from pandas.io.formats.printing import pprint_thing
_shared_doc_kwargs = dict(
axes='items, major_axis, minor_axis',
klass="Panel",
axes_single_arg="{0, 1, 2, 'items', 'major_axis', 'minor_axis'}",
optional_mapper='', optional_axis='', optional_labels='')
_shared_doc_kwargs['args_transpose'] = (
"three positional arguments: each one of\n{ax_single}".format(
ax_single=_shared_doc_kwargs['axes_single_arg']))
def _ensure_like_indices(time, panels):
"""
Makes sure that time and panels are conformable.
"""
n_time = len(time)
n_panel = len(panels)
u_panels = np.unique(panels) # this sorts!
u_time = np.unique(time)
if len(u_time) == n_time:
time = np.tile(u_time, len(u_panels))
if len(u_panels) == n_panel:
panels = np.repeat(u_panels, len(u_time))
return time, panels
def panel_index(time, panels, names=None):
"""
Returns a multi-index suitable for a panel-like DataFrame.
Parameters
----------
time : array-like
Time index, does not have to repeat
panels : array-like
Panel index, does not have to repeat
names : list, optional
List containing the names of the indices
Returns
-------
multi_index : MultiIndex
Time index is the first level, the panels are the second level.
Examples
--------
>>> years = range(1960,1963)
>>> panels = ['A', 'B', 'C']
>>> panel_idx = panel_index(years, panels)
>>> panel_idx
MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'),
(1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'),
(1962, 'C')], dtype=object)
or
>>> years = np.repeat(range(1960,1963), 3)
>>> panels = np.tile(['A', 'B', 'C'], 3)
>>> panel_idx = panel_index(years, panels)
>>> panel_idx
MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'),
(1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'),
(1962, 'C')], dtype=object)
"""
if names is None:
names = ['time', 'panel']
time, panels = _ensure_like_indices(time, panels)
return MultiIndex.from_arrays([time, panels], sortorder=None, names=names)
class Panel(NDFrame):
"""
Represents wide format panel data, stored as 3-dimensional array.
.. deprecated:: 0.20.0
The recommended way to represent 3-D data are with a MultiIndex on a
DataFrame via the :attr:`~Panel.to_frame()` method or with the
`xarray package <http://xarray.pydata.org/en/stable/>`__.
Pandas provides a :attr:`~Panel.to_xarray()` method to automate this
conversion.
Parameters
----------
data : ndarray (items x major x minor), or dict of DataFrames
items : Index or array-like
axis=0
major_axis : Index or array-like
axis=1
minor_axis : Index or array-like
axis=2
copy : boolean, default False
Copy data from inputs. Only affects DataFrame / 2d ndarray input
dtype : dtype, default None
Data type to force, otherwise infer
"""
@property
def _constructor(self):
return type(self)
_constructor_sliced = DataFrame
def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
copy=False, dtype=None):
# deprecation GH13563
warnings.warn("\nPanel is deprecated and will be removed in a "
"future version.\nThe recommended way to represent "
"these types of 3-dimensional data are with a "
"MultiIndex on a DataFrame, via the "
"Panel.to_frame() method\n"
"Alternatively, you can use the xarray package "
"http://xarray.pydata.org/en/stable/.\n"
"Pandas provides a `.to_xarray()` method to help "
"automate this conversion.\n",
FutureWarning, stacklevel=3)
self._init_data(data=data, items=items, major_axis=major_axis,
minor_axis=minor_axis, copy=copy, dtype=dtype)
def _init_data(self, data, copy, dtype, **kwargs):
"""
Generate ND initialization; axes are passed
as required objects to __init__.
"""
if data is None:
data = {}
if dtype is not None:
dtype = self._validate_dtype(dtype)
passed_axes = [kwargs.pop(a, None) for a in self._AXIS_ORDERS]
if kwargs:
raise TypeError('_init_data() got an unexpected keyword '
'argument "{0}"'.format(list(kwargs.keys())[0]))
axes = None
if isinstance(data, BlockManager):
if com._any_not_none(*passed_axes):
axes = [x if x is not None else y
for x, y in zip(passed_axes, data.axes)]
mgr = data
elif isinstance(data, dict):
mgr = self._init_dict(data, passed_axes, dtype=dtype)
copy = False
dtype = None
elif isinstance(data, (np.ndarray, list)):
mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
copy = False
dtype = None
elif is_scalar(data) and com._all_not_none(*passed_axes):
values = cast_scalar_to_array([len(x) for x in passed_axes],
data, dtype=dtype)
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
copy=False)
copy = False
else: # pragma: no cover
raise ValueError('Panel constructor not properly called!')
NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
def _init_dict(self, data, axes, dtype=None):
haxis = axes.pop(self._info_axis_number)
# prefilter if haxis passed
if haxis is not None:
haxis = ensure_index(haxis)
data = OrderedDict((k, v)
for k, v in compat.iteritems(data)
if k in haxis)
else:
keys = com.dict_keys_to_ordered_list(data)
haxis = Index(keys)
for k, v in compat.iteritems(data):
if isinstance(v, dict):
data[k] = self._constructor_sliced(v)
# extract axis for remaining axes & create the slicemap
raxes = [self._extract_axis(self, data, axis=i) if a is None else a
for i, a in enumerate(axes)]
raxes_sm = self._extract_axes_for_slice(self, raxes)
# shallow copy
arrays = []
haxis_shape = [len(a) for a in raxes]
for h in haxis:
v = values = data.get(h)
if v is None:
values = np.empty(haxis_shape, dtype=dtype)
values.fill(np.nan)
elif isinstance(v, self._constructor_sliced):
d = raxes_sm.copy()
d['copy'] = False
v = v.reindex(**d)
if dtype is not None:
v = v.astype(dtype)
values = v.values
arrays.append(values)
return self._init_arrays(arrays, haxis, [haxis] + raxes)
def _init_arrays(self, arrays, arr_names, axes):
return create_block_manager_from_arrays(arrays, arr_names, axes)
@classmethod
def from_dict(cls, data, intersect=False, orient='items', dtype=None):
"""
Construct Panel from dict of DataFrame objects.
Parameters
----------
data : dict
{field : DataFrame}
intersect : boolean
Intersect indexes of input DataFrames
orient : {'items', 'minor'}, default 'items'
The "orientation" of the data. If the keys of the passed dict
should be the items of the result panel, pass 'items'
(default). Otherwise if the columns of the values of the passed
DataFrame objects should be the items (which in the case of
mixed-dtype data you should do), instead pass 'minor'
dtype : dtype, default None
Data type to force, otherwise infer
Returns
-------
Panel
"""
from collections import defaultdict
orient = orient.lower()
if orient == 'minor':
new_data = defaultdict(OrderedDict)
for col, df in compat.iteritems(data):
for item, s in compat.iteritems(df):
new_data[item][col] = s
data = new_data
elif orient != 'items': # pragma: no cover
raise ValueError('Orientation must be one of {items, minor}.')
d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype)
ks = list(d['data'].keys())
if not isinstance(d['data'], OrderedDict):
ks = list(sorted(ks))
d[cls._info_axis_name] = Index(ks)
return cls(**d)
def __getitem__(self, key):
key = com.apply_if_callable(key, self)
if isinstance(self._info_axis, MultiIndex):
return self._getitem_multilevel(key)
if not (is_list_like(key) or isinstance(key, slice)):
return super(Panel, self).__getitem__(key)
return self.loc[key]
def _getitem_multilevel(self, key):
info = self._info_axis
loc = info.get_loc(key)
if isinstance(loc, (slice, np.ndarray)):
new_index = info[loc]
result_index = maybe_droplevels(new_index, key)
slices = [loc] + [slice(None)] * (self._AXIS_LEN - 1)
new_values = self.values[slices]
d = self._construct_axes_dict(self._AXIS_ORDERS[1:])
d[self._info_axis_name] = result_index
result = self._constructor(new_values, **d)
return result
else:
return self._get_item_cache(key)
def _init_matrix(self, data, axes, dtype=None, copy=False):
values = self._prep_ndarray(self, data, copy=copy)
if dtype is not None:
try:
values = values.astype(dtype)
except Exception:
raise ValueError('failed to cast to '
'{datatype}'.format(datatype=dtype))
shape = values.shape
fixed_axes = []
for i, ax in enumerate(axes):
if ax is None:
ax = ibase.default_index(shape[i])
else:
ax = ensure_index(ax)
fixed_axes.append(ax)
return create_block_manager_from_blocks([values], fixed_axes)
# ----------------------------------------------------------------------
# Comparison methods
def _compare_constructor(self, other, func):
if not self._indexed_same(other):
raise Exception('Can only compare identically-labeled '
'same type objects')
new_data = {col: func(self[col], other[col])
for col in self._info_axis}
d = self._construct_axes_dict(copy=False)
return self._constructor(data=new_data, **d)
# ----------------------------------------------------------------------
Loading ...