Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / indexing.py

# pylint: disable=W0223
import textwrap
import warnings

import numpy as np

from pandas._libs.indexing import _NDFrameIndexerBase
import pandas.compat as compat
from pandas.compat import range, zip
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
    ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
    is_list_like, is_scalar, is_sequence, is_sparse)
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.core.dtypes.missing import _infer_fill_value, isna

import pandas.core.common as com
from pandas.core.index import Index, MultiIndex


# the supported indexers
def get_indexers_list():

    return [
        ('ix', _IXIndexer),
        ('iloc', _iLocIndexer),
        ('loc', _LocIndexer),
        ('at', _AtIndexer),
        ('iat', _iAtIndexer),
    ]


# "null slice"
_NS = slice(None, None)


# the public IndexSlicerMaker
class _IndexSlice(object):
    """
    Create an object to more easily perform multi-index slicing

    See Also
    --------
    MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.

    Notes
    -----
    See :ref:`Defined Levels <advanced.shown_levels>`
    for further info on slicing a MultiIndex.

    Examples
    --------

    >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
    >>> columns = ['foo', 'bar']
    >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
                            index=midx, columns=columns)

    Using the default slice command:

    >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
               foo  bar
        A0 B0    0    1
           B1    2    3
        A1 B0    8    9
           B1   10   11

    Using the IndexSlice class for a more intuitive command:

    >>> idx = pd.IndexSlice
    >>> dfmi.loc[idx[:, 'B0':'B1'], :]
               foo  bar
        A0 B0    0    1
           B1    2    3
        A1 B0    8    9
           B1   10   11
    """

    def __getitem__(self, arg):
        return arg


IndexSlice = _IndexSlice()


class IndexingError(Exception):
    pass


class _NDFrameIndexer(_NDFrameIndexerBase):
    _valid_types = None
    _exception = KeyError
    axis = None

    def __call__(self, axis=None):
        # we need to return a copy of ourselves
        new_self = self.__class__(self.name, self.obj)

        if axis is not None:
            axis = self.obj._get_axis_number(axis)
        new_self.axis = axis
        return new_self

    def __iter__(self):
        raise NotImplementedError('ix is not iterable')

    def __getitem__(self, key):
        if type(key) is tuple:
            key = tuple(com.apply_if_callable(x, self.obj)
                        for x in key)
            try:
                values = self.obj._get_value(*key)
                if is_scalar(values):
                    return values
            except Exception:
                pass

            return self._getitem_tuple(key)
        else:
            # we by definition only have the 0th axis
            axis = self.axis or 0

            key = com.apply_if_callable(key, self.obj)
            return self._getitem_axis(key, axis=axis)

    def _get_label(self, label, axis=None):
        if axis is None:
            axis = self.axis or 0

        if self.ndim == 1:
            # for perf reasons we want to try _xs first
            # as its basically direct indexing
            # but will fail when the index is not present
            # see GH5667
            return self.obj._xs(label, axis=axis)
        elif isinstance(label, tuple) and isinstance(label[axis], slice):
            raise IndexingError('no slices here, handle elsewhere')

        return self.obj._xs(label, axis=axis)

    def _get_loc(self, key, axis=None):
        if axis is None:
            axis = self.axis
        return self.obj._ixs(key, axis=axis)

    def _slice(self, obj, axis=None, kind=None):
        if axis is None:
            axis = self.axis
        return self.obj._slice(obj, axis=axis, kind=kind)

    def _get_setitem_indexer(self, key):
        if self.axis is not None:
            return self._convert_tuple(key, is_setter=True)

        axis = self.obj._get_axis(0)

        if isinstance(axis, MultiIndex) and self.name != 'iloc':
            try:
                return axis.get_loc(key)
            except Exception:
                pass

        if isinstance(key, tuple):
            try:
                return self._convert_tuple(key, is_setter=True)
            except IndexingError:
                pass

        if isinstance(key, range):
            return self._convert_range(key, is_setter=True)

        try:
            return self._convert_to_indexer(key, is_setter=True)
        except TypeError as e:

            # invalid indexer type vs 'other' indexing errors
            if 'cannot do' in str(e):
                raise
            raise IndexingError(key)

    def __setitem__(self, key, value):
        if isinstance(key, tuple):
            key = tuple(com.apply_if_callable(x, self.obj)
                        for x in key)
        else:
            key = com.apply_if_callable(key, self.obj)
        indexer = self._get_setitem_indexer(key)
        self._setitem_with_indexer(indexer, value)

    def _validate_key(self, key, axis):
        """
        Ensure that key is valid for current indexer.

        Parameters
        ----------
        key : scalar, slice or list-like
            The key requested

        axis : int
            Dimension on which the indexing is being made

        Raises
        ------
        TypeError
            If the key (or some element of it) has wrong type

        IndexError
            If the key (or some element of it) is out of bounds

        KeyError
            If the key was not found
        """
        raise AbstractMethodError()

    def _has_valid_tuple(self, key):
        """ check the key for valid keys across my indexer """
        for i, k in enumerate(key):
            if i >= self.obj.ndim:
                raise IndexingError('Too many indexers')
            try:
                self._validate_key(k, i)
            except ValueError:
                raise ValueError("Location based indexing can only have "
                                 "[{types}] types"
                                 .format(types=self._valid_types))

    def _is_nested_tuple_indexer(self, tup):
        if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
            return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
        return False

    def _convert_tuple(self, key, is_setter=False):
        keyidx = []
        if self.axis is not None:
            axis = self.obj._get_axis_number(self.axis)
            for i in range(self.ndim):
                if i == axis:
                    keyidx.append(self._convert_to_indexer(
                        key, axis=axis, is_setter=is_setter))
                else:
                    keyidx.append(slice(None))
        else:
            for i, k in enumerate(key):
                if i >= self.obj.ndim:
                    raise IndexingError('Too many indexers')
                idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
                keyidx.append(idx)
        return tuple(keyidx)

    def _convert_range(self, key, is_setter=False):
        """ convert a range argument """
        return list(key)

    def _convert_scalar_indexer(self, key, axis):
        # if we are accessing via lowered dim, use the last dim
        if axis is None:
            axis = 0
        ax = self.obj._get_axis(min(axis, self.ndim - 1))
        # a scalar
        return ax._convert_scalar_indexer(key, kind=self.name)

    def _convert_slice_indexer(self, key, axis):
        # if we are accessing via lowered dim, use the last dim
        ax = self.obj._get_axis(min(axis, self.ndim - 1))
        return ax._convert_slice_indexer(key, kind=self.name)

    def _has_valid_setitem_indexer(self, indexer):
        return True

    def _has_valid_positional_setitem_indexer(self, indexer):
        """ validate that an positional indexer cannot enlarge its target
        will raise if needed, does not modify the indexer externally
        """
        if isinstance(indexer, dict):
            raise IndexError("{0} cannot enlarge its target object"
                             .format(self.name))
        else:
            if not isinstance(indexer, tuple):
                indexer = self._tuplify(indexer)
            for ax, i in zip(self.obj.axes, indexer):
                if isinstance(i, slice):
                    # should check the stop slice?
                    pass
                elif is_list_like_indexer(i):
                    # should check the elements?
                    pass
                elif is_integer(i):
                    if i >= len(ax):
                        raise IndexError("{name} cannot enlarge its target "
                                         "object".format(name=self.name))
                elif isinstance(i, dict):
                    raise IndexError("{name} cannot enlarge its target object"
                                     .format(name=self.name))

        return True

    def _setitem_with_indexer(self, indexer, value):
        self._has_valid_setitem_indexer(indexer)

        # also has the side effect of consolidating in-place
        from pandas import Series
        info_axis = self.obj._info_axis_number

        # maybe partial set
        take_split_path = self.obj._is_mixed_type

        # if there is only one block/type, still have to take split path
        # unless the block is one-dimensional or it can hold the value
        if not take_split_path and self.obj._data.blocks:
            blk, = self.obj._data.blocks
            if 1 < blk.ndim:  # in case of dict, keys are indices
                val = list(value.values()) if isinstance(value,
                                                         dict) else value
                take_split_path = not blk._can_hold_element(val)

        if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):

            for i, ax in zip(indexer, self.obj.axes):

                # if we have any multi-indexes that have non-trivial slices
                # (not null slices) then we must take the split path, xref
                # GH 10360
                if (isinstance(ax, MultiIndex) and
                        not (is_integer(i) or com.is_null_slice(i))):
                    take_split_path = True
                    break

        if isinstance(indexer, tuple):
            nindexer = []
            for i, idx in enumerate(indexer):
                if isinstance(idx, dict):

                    # reindex the axis to the new value
                    # and set inplace
                    key, _ = convert_missing_indexer(idx)

                    # if this is the items axes, then take the main missing
                    # path first
                    # this correctly sets the dtype and avoids cache issues
                    # essentially this separates out the block that is needed
                    # to possibly be modified
                    if self.ndim > 1 and i == self.obj._info_axis_number:
Loading ...