Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / internals / blocks.py

# -*- coding: utf-8 -*-
from datetime import date, datetime, timedelta
import functools
import inspect
import re
import warnings

import numpy as np

from pandas._libs import internals as libinternals, lib, tslib, tslibs
from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike
import pandas.compat as compat
from pandas.compat import range, zip
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
    astype_nansafe, find_common_type, infer_dtype_from,
    infer_dtype_from_scalar, maybe_convert_objects, maybe_downcast_to_dtype,
    maybe_infer_dtype_type, maybe_promote, maybe_upcast, soft_convert_objects)
from pandas.core.dtypes.common import (
    _NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
    is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
    is_dtype_equal, is_extension_array_dtype, is_extension_type,
    is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
    is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype,
    is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.dtypes import (
    CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
from pandas.core.dtypes.generic import (
    ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
    ABCSeries)
from pandas.core.dtypes.missing import (
    _isna_compat, array_equivalent, isna, notna)

import pandas.core.algorithms as algos
from pandas.core.arrays import (
    Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexing import check_setitem_lengths
from pandas.core.internals.arrays import extract_array
import pandas.core.missing as missing
from pandas.core.nanops import nanpercentile

from pandas.io.formats.printing import pprint_thing


class Block(PandasObject):
    """
    Canonical n-dimensional unit of homogeneous dtype contained in a pandas
    data structure

    Index-ignorant; let the container take care of that
    """
    __slots__ = ['_mgr_locs', 'values', 'ndim']
    is_numeric = False
    is_float = False
    is_integer = False
    is_complex = False
    is_datetime = False
    is_datetimetz = False
    is_timedelta = False
    is_bool = False
    is_object = False
    is_categorical = False
    is_sparse = False
    is_extension = False
    _box_to_block_values = True
    _can_hold_na = False
    _can_consolidate = True
    _verify_integrity = True
    _validate_ndim = True
    _ftype = 'dense'
    _concatenator = staticmethod(np.concatenate)

    def __init__(self, values, placement, ndim=None):
        self.ndim = self._check_ndim(values, ndim)
        self.mgr_locs = placement
        self.values = values

        if (self._validate_ndim and self.ndim and
                len(self.mgr_locs) != len(self.values)):
            raise ValueError(
                'Wrong number of items passed {val}, placement implies '
                '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))

    def _check_ndim(self, values, ndim):
        """ndim inference and validation.

        Infers ndim from 'values' if not provided to __init__.
        Validates that values.ndim and ndim are consistent if and only if
        the class variable '_validate_ndim' is True.

        Parameters
        ----------
        values : array-like
        ndim : int or None

        Returns
        -------
        ndim : int

        Raises
        ------
        ValueError : the number of dimensions do not match
        """
        if ndim is None:
            ndim = values.ndim

        if self._validate_ndim and values.ndim != ndim:
            msg = ("Wrong number of dimensions. values.ndim != ndim "
                   "[{} != {}]")
            raise ValueError(msg.format(values.ndim, ndim))

        return ndim

    @property
    def _holder(self):
        """The array-like that can hold the underlying values.

        None for 'Block', overridden by subclasses that don't
        use an ndarray.
        """
        return None

    @property
    def _consolidate_key(self):
        return (self._can_consolidate, self.dtype.name)

    @property
    def _is_single_block(self):
        return self.ndim == 1

    @property
    def is_view(self):
        """ return a boolean if I am possibly a view """
        return self.values.base is not None

    @property
    def is_datelike(self):
        """ return True if I am a non-datelike """
        return self.is_datetime or self.is_timedelta

    def is_categorical_astype(self, dtype):
        """
        validate that we have a astypeable to categorical,
        returns a boolean if we are a categorical
        """
        if dtype is Categorical or dtype is CategoricalDtype:
            # this is a pd.Categorical, but is not
            # a valid type for astypeing
            raise TypeError("invalid type {0} for astype".format(dtype))

        elif is_categorical_dtype(dtype):
            return True

        return False

    def external_values(self, dtype=None):
        """ return an outside world format, currently just the ndarray """
        return self.values

    def internal_values(self, dtype=None):
        """ return an internal format, currently just the ndarray
        this should be the pure internal API format
        """
        return self.values

    def formatting_values(self):
        """Return the internal values used by the DataFrame/SeriesFormatter"""
        return self.internal_values()

    def get_values(self, dtype=None):
        """
        return an internal format, currently just the ndarray
        this is often overridden to handle to_dense like operations
        """
        if is_object_dtype(dtype):
            return self.values.astype(object)
        return self.values

    def to_dense(self):
        return self.values.view()

    @property
    def _na_value(self):
        return np.nan

    @property
    def fill_value(self):
        return np.nan

    @property
    def mgr_locs(self):
        return self._mgr_locs

    @mgr_locs.setter
    def mgr_locs(self, new_mgr_locs):
        if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
            new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)

        self._mgr_locs = new_mgr_locs

    @property
    def array_dtype(self):
        """ the dtype to return if I want to construct this block as an
        array
        """
        return self.dtype

    def make_block(self, values, placement=None, ndim=None):
        """
        Create a new block, with type inference propagate any values that are
        not specified
        """
        if placement is None:
            placement = self.mgr_locs
        if ndim is None:
            ndim = self.ndim

        return make_block(values, placement=placement, ndim=ndim)

    def make_block_same_class(self, values, placement=None, ndim=None,
                              dtype=None):
        """ Wrap given values in a block of same type as self. """
        if dtype is not None:
            # issue 19431 fastparquet is passing this
            warnings.warn("dtype argument is deprecated, will be removed "
                          "in a future release.", DeprecationWarning)
        if placement is None:
            placement = self.mgr_locs
        return make_block(values, placement=placement, ndim=ndim,
                          klass=self.__class__, dtype=dtype)

    def __unicode__(self):

        # don't want to print out all of the items here
        name = pprint_thing(self.__class__.__name__)
        if self._is_single_block:

            result = '{name}: {len} dtype: {dtype}'.format(
                name=name, len=len(self), dtype=self.dtype)

        else:

            shape = ' x '.join(pprint_thing(s) for s in self.shape)
            result = '{name}: {index}, {shape}, dtype: {dtype}'.format(
                name=name, index=pprint_thing(self.mgr_locs.indexer),
                shape=shape, dtype=self.dtype)

        return result

    def __len__(self):
        return len(self.values)

    def __getstate__(self):
        return self.mgr_locs.indexer, self.values

    def __setstate__(self, state):
        self.mgr_locs = libinternals.BlockPlacement(state[0])
        self.values = state[1]
        self.ndim = self.values.ndim

    def _slice(self, slicer):
        """ return a slice of my values """
        return self.values[slicer]

    def reshape_nd(self, labels, shape, ref_items):
        """
        Parameters
        ----------
        labels : list of new axis labels
        shape : new shape
        ref_items : new ref_items

        return a new block that is transformed to a nd block
        """
        return _block2d_to_blocknd(values=self.get_values().T,
                                   placement=self.mgr_locs, shape=shape,
                                   labels=labels, ref_items=ref_items)

    def getitem_block(self, slicer, new_mgr_locs=None):
        """
        Perform __getitem__-like, return result as block.

        As of now, only supports slices that preserve dimensionality.
        """
        if new_mgr_locs is None:
            if isinstance(slicer, tuple):
                axis0_slicer = slicer[0]
            else:
                axis0_slicer = slicer
            new_mgr_locs = self.mgr_locs[axis0_slicer]

        new_values = self._slice(slicer)

        if self._validate_ndim and new_values.ndim != self.ndim:
            raise ValueError("Only same dim slicing is allowed")

        return self.make_block_same_class(new_values, new_mgr_locs)

    @property
    def shape(self):
        return self.values.shape

    @property
    def dtype(self):
        return self.values.dtype

    @property
    def ftype(self):
        if getattr(self.values, '_pandas_ftype', False):
            dtype = self.dtype.subtype
        else:
            dtype = self.dtype
        return "{dtype}:{ftype}".format(dtype=dtype, ftype=self._ftype)

    def merge(self, other):
        return _merge_blocks([self, other])

    def concat_same_type(self, to_concat, placement=None):
        """
        Concatenate list of single blocks of the same type.
        """
        values = self._concatenator([blk.values for blk in to_concat],
                                    axis=self.ndim - 1)
        return self.make_block_same_class(
            values, placement=placement or slice(0, len(values), 1))

    def iget(self, i):
        return self.values[i]

    def set(self, locs, values):
        """
        Modify Block in-place with new item value

        Returns
        -------
        None
        """
        self.values[locs] = values

    def delete(self, loc):
Loading ...