# -*- coding: utf-8 -*-
from datetime import date, datetime, timedelta
import functools
import inspect
import re
import warnings
import numpy as np
from pandas._libs import internals as libinternals, lib, tslib, tslibs
from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike
import pandas.compat as compat
from pandas.compat import range, zip
from pandas.util._validators import validate_bool_kwarg
from pandas.core.dtypes.cast import (
astype_nansafe, find_common_type, infer_dtype_from,
infer_dtype_from_scalar, maybe_convert_objects, maybe_downcast_to_dtype,
maybe_infer_dtype_type, maybe_promote, maybe_upcast, soft_convert_objects)
from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_dtype_equal, is_extension_array_dtype, is_extension_type,
is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype,
is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.dtypes import (
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
ABCSeries)
from pandas.core.dtypes.missing import (
_isna_compat, array_equivalent, isna, notna)
import pandas.core.algorithms as algos
from pandas.core.arrays import (
Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexing import check_setitem_lengths
from pandas.core.internals.arrays import extract_array
import pandas.core.missing as missing
from pandas.core.nanops import nanpercentile
from pandas.io.formats.printing import pprint_thing
class Block(PandasObject):
"""
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
data structure
Index-ignorant; let the container take care of that
"""
__slots__ = ['_mgr_locs', 'values', 'ndim']
is_numeric = False
is_float = False
is_integer = False
is_complex = False
is_datetime = False
is_datetimetz = False
is_timedelta = False
is_bool = False
is_object = False
is_categorical = False
is_sparse = False
is_extension = False
_box_to_block_values = True
_can_hold_na = False
_can_consolidate = True
_verify_integrity = True
_validate_ndim = True
_ftype = 'dense'
_concatenator = staticmethod(np.concatenate)
def __init__(self, values, placement, ndim=None):
self.ndim = self._check_ndim(values, ndim)
self.mgr_locs = placement
self.values = values
if (self._validate_ndim and self.ndim and
len(self.mgr_locs) != len(self.values)):
raise ValueError(
'Wrong number of items passed {val}, placement implies '
'{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
def _check_ndim(self, values, ndim):
"""ndim inference and validation.
Infers ndim from 'values' if not provided to __init__.
Validates that values.ndim and ndim are consistent if and only if
the class variable '_validate_ndim' is True.
Parameters
----------
values : array-like
ndim : int or None
Returns
-------
ndim : int
Raises
------
ValueError : the number of dimensions do not match
"""
if ndim is None:
ndim = values.ndim
if self._validate_ndim and values.ndim != ndim:
msg = ("Wrong number of dimensions. values.ndim != ndim "
"[{} != {}]")
raise ValueError(msg.format(values.ndim, ndim))
return ndim
@property
def _holder(self):
"""The array-like that can hold the underlying values.
None for 'Block', overridden by subclasses that don't
use an ndarray.
"""
return None
@property
def _consolidate_key(self):
return (self._can_consolidate, self.dtype.name)
@property
def _is_single_block(self):
return self.ndim == 1
@property
def is_view(self):
""" return a boolean if I am possibly a view """
return self.values.base is not None
@property
def is_datelike(self):
""" return True if I am a non-datelike """
return self.is_datetime or self.is_timedelta
def is_categorical_astype(self, dtype):
"""
validate that we have a astypeable to categorical,
returns a boolean if we are a categorical
"""
if dtype is Categorical or dtype is CategoricalDtype:
# this is a pd.Categorical, but is not
# a valid type for astypeing
raise TypeError("invalid type {0} for astype".format(dtype))
elif is_categorical_dtype(dtype):
return True
return False
def external_values(self, dtype=None):
""" return an outside world format, currently just the ndarray """
return self.values
def internal_values(self, dtype=None):
""" return an internal format, currently just the ndarray
this should be the pure internal API format
"""
return self.values
def formatting_values(self):
"""Return the internal values used by the DataFrame/SeriesFormatter"""
return self.internal_values()
def get_values(self, dtype=None):
"""
return an internal format, currently just the ndarray
this is often overridden to handle to_dense like operations
"""
if is_object_dtype(dtype):
return self.values.astype(object)
return self.values
def to_dense(self):
return self.values.view()
@property
def _na_value(self):
return np.nan
@property
def fill_value(self):
return np.nan
@property
def mgr_locs(self):
return self._mgr_locs
@mgr_locs.setter
def mgr_locs(self, new_mgr_locs):
if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)
self._mgr_locs = new_mgr_locs
@property
def array_dtype(self):
""" the dtype to return if I want to construct this block as an
array
"""
return self.dtype
def make_block(self, values, placement=None, ndim=None):
"""
Create a new block, with type inference propagate any values that are
not specified
"""
if placement is None:
placement = self.mgr_locs
if ndim is None:
ndim = self.ndim
return make_block(values, placement=placement, ndim=ndim)
def make_block_same_class(self, values, placement=None, ndim=None,
dtype=None):
""" Wrap given values in a block of same type as self. """
if dtype is not None:
# issue 19431 fastparquet is passing this
warnings.warn("dtype argument is deprecated, will be removed "
"in a future release.", DeprecationWarning)
if placement is None:
placement = self.mgr_locs
return make_block(values, placement=placement, ndim=ndim,
klass=self.__class__, dtype=dtype)
def __unicode__(self):
# don't want to print out all of the items here
name = pprint_thing(self.__class__.__name__)
if self._is_single_block:
result = '{name}: {len} dtype: {dtype}'.format(
name=name, len=len(self), dtype=self.dtype)
else:
shape = ' x '.join(pprint_thing(s) for s in self.shape)
result = '{name}: {index}, {shape}, dtype: {dtype}'.format(
name=name, index=pprint_thing(self.mgr_locs.indexer),
shape=shape, dtype=self.dtype)
return result
def __len__(self):
return len(self.values)
def __getstate__(self):
return self.mgr_locs.indexer, self.values
def __setstate__(self, state):
self.mgr_locs = libinternals.BlockPlacement(state[0])
self.values = state[1]
self.ndim = self.values.ndim
def _slice(self, slicer):
""" return a slice of my values """
return self.values[slicer]
def reshape_nd(self, labels, shape, ref_items):
"""
Parameters
----------
labels : list of new axis labels
shape : new shape
ref_items : new ref_items
return a new block that is transformed to a nd block
"""
return _block2d_to_blocknd(values=self.get_values().T,
placement=self.mgr_locs, shape=shape,
labels=labels, ref_items=ref_items)
def getitem_block(self, slicer, new_mgr_locs=None):
"""
Perform __getitem__-like, return result as block.
As of now, only supports slices that preserve dimensionality.
"""
if new_mgr_locs is None:
if isinstance(slicer, tuple):
axis0_slicer = slicer[0]
else:
axis0_slicer = slicer
new_mgr_locs = self.mgr_locs[axis0_slicer]
new_values = self._slice(slicer)
if self._validate_ndim and new_values.ndim != self.ndim:
raise ValueError("Only same dim slicing is allowed")
return self.make_block_same_class(new_values, new_mgr_locs)
@property
def shape(self):
return self.values.shape
@property
def dtype(self):
return self.values.dtype
@property
def ftype(self):
if getattr(self.values, '_pandas_ftype', False):
dtype = self.dtype.subtype
else:
dtype = self.dtype
return "{dtype}:{ftype}".format(dtype=dtype, ftype=self._ftype)
def merge(self, other):
return _merge_blocks([self, other])
def concat_same_type(self, to_concat, placement=None):
"""
Concatenate list of single blocks of the same type.
"""
values = self._concatenator([blk.values for blk in to_concat],
axis=self.ndim - 1)
return self.make_block_same_class(
values, placement=placement or slice(0, len(values), 1))
def iget(self, i):
return self.values[i]
def set(self, locs, values):
"""
Modify Block in-place with new item value
Returns
-------
None
"""
self.values[locs] = values
def delete(self, loc):
Loading ...