Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / indexes / test_common.py

"""
Collection of tests asserting things that should be true for
any index subclass. Makes use of the `indices` fixture defined
in pandas/tests/indexes/conftest.py.
"""
import re

import numpy as np
import pytest

from pandas._libs.tslibs import iNaT

from pandas.core.dtypes.common import needs_i8_conversion

import pandas as pd
from pandas import CategoricalIndex, MultiIndex, RangeIndex
import pandas.util.testing as tm


class TestCommon:
    def test_droplevel(self, indices):
        # GH 21115
        if isinstance(indices, MultiIndex):
            # Tested separately in test_multi.py
            return

        assert indices.droplevel([]).equals(indices)

        for level in indices.name, [indices.name]:
            if isinstance(indices.name, tuple) and level is indices.name:
                # GH 21121 : droplevel with tuple name
                continue
            with pytest.raises(ValueError):
                indices.droplevel(level)

        for level in "wrong", ["wrong"]:
            with pytest.raises(
                KeyError,
                match=r"'Requested level \(wrong\) does not match index name \(None\)'",
            ):
                indices.droplevel(level)

    def test_constructor_non_hashable_name(self, indices):
        # GH 20527

        if isinstance(indices, MultiIndex):
            pytest.skip("multiindex handled in test_multi.py")

        message = "Index.name must be a hashable type"
        renamed = [["1"]]

        # With .rename()
        with pytest.raises(TypeError, match=message):
            indices.rename(name=renamed)

        # With .set_names()
        with pytest.raises(TypeError, match=message):
            indices.set_names(names=renamed)

    def test_constructor_unwraps_index(self, indices):
        if isinstance(indices, pd.MultiIndex):
            raise pytest.skip("MultiIndex has no ._data")
        a = indices
        b = type(a)(a)
        tm.assert_equal(a._data, b._data)

    @pytest.mark.parametrize("itm", [101, "no_int"])
    # FutureWarning from non-tuple sequence of nd indexing
    @pytest.mark.filterwarnings("ignore::FutureWarning")
    def test_getitem_error(self, indices, itm):
        with pytest.raises(IndexError):
            indices[itm]

    @pytest.mark.parametrize(
        "fname, sname, expected_name",
        [
            ("A", "A", "A"),
            ("A", "B", None),
            ("A", None, None),
            (None, "B", None),
            (None, None, None),
        ],
    )
    def test_corner_union(self, indices, fname, sname, expected_name):
        # GH 9943 9862
        # Test unions with various name combinations
        # Do not test MultiIndex or repeats

        if isinstance(indices, MultiIndex) or not indices.is_unique:
            pytest.skip("Not for MultiIndex or repeated indices")

        # Test copy.union(copy)
        first = indices.copy().set_names(fname)
        second = indices.copy().set_names(sname)
        union = first.union(second)
        expected = indices.copy().set_names(expected_name)
        tm.assert_index_equal(union, expected)

        # Test copy.union(empty)
        first = indices.copy().set_names(fname)
        second = indices.drop(indices).set_names(sname)
        union = first.union(second)
        expected = indices.copy().set_names(expected_name)
        tm.assert_index_equal(union, expected)

        # Test empty.union(copy)
        first = indices.drop(indices).set_names(fname)
        second = indices.copy().set_names(sname)
        union = first.union(second)
        expected = indices.copy().set_names(expected_name)
        tm.assert_index_equal(union, expected)

        # Test empty.union(empty)
        first = indices.drop(indices).set_names(fname)
        second = indices.drop(indices).set_names(sname)
        union = first.union(second)
        expected = indices.drop(indices).set_names(expected_name)
        tm.assert_index_equal(union, expected)

    def test_to_flat_index(self, indices):
        # 22866
        if isinstance(indices, MultiIndex):
            pytest.skip("Separate expectation for MultiIndex")

        result = indices.to_flat_index()
        tm.assert_index_equal(result, indices)

    def test_wrong_number_names(self, indices):
        with pytest.raises(ValueError, match="^Length"):
            indices.names = ["apple", "banana", "carrot"]

    def test_set_name_methods(self, indices):
        new_name = "This is the new name for this index"

        # don't tests a MultiIndex here (as its tested separated)
        if isinstance(indices, MultiIndex):
            pytest.skip("Skip check for MultiIndex")
        original_name = indices.name
        new_ind = indices.set_names([new_name])
        assert new_ind.name == new_name
        assert indices.name == original_name
        res = indices.rename(new_name, inplace=True)

        # should return None
        assert res is None
        assert indices.name == new_name
        assert indices.names == [new_name]
        # FIXME: dont leave commented-out
        # with pytest.raises(TypeError, match="list-like"):
        #    # should still fail even if it would be the right length
        #    ind.set_names("a")
        with pytest.raises(ValueError, match="Level must be None"):
            indices.set_names("a", level=0)

        # rename in place just leaves tuples and other containers alone
        name = ("A", "B")
        indices.rename(name, inplace=True)
        assert indices.name == name
        assert indices.names == [name]

    def test_dtype_str(self, indices):
        with tm.assert_produces_warning(FutureWarning):
            dtype = indices.dtype_str
            assert isinstance(dtype, str)
            assert dtype == str(indices.dtype)

    def test_hash_error(self, indices):
        index = indices
        with pytest.raises(
            TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
        ):
            hash(indices)

    def test_copy_and_deepcopy(self, indices):
        from copy import copy, deepcopy

        if isinstance(indices, MultiIndex):
            pytest.skip("Skip check for MultiIndex")

        for func in (copy, deepcopy):
            idx_copy = func(indices)
            assert idx_copy is not indices
            assert idx_copy.equals(indices)

        new_copy = indices.copy(deep=True, name="banana")
        assert new_copy.name == "banana"

    def test_unique(self, indices):
        # don't test a MultiIndex here (as its tested separated)
        # don't test a CategoricalIndex because categories change (GH 18291)
        if isinstance(indices, (MultiIndex, CategoricalIndex)):
            pytest.skip("Skip check for MultiIndex/CategoricalIndex")

        # GH 17896
        expected = indices.drop_duplicates()
        for level in 0, indices.name, None:
            result = indices.unique(level=level)
            tm.assert_index_equal(result, expected)

        msg = "Too many levels: Index has only 1 level, not 4"
        with pytest.raises(IndexError, match=msg):
            indices.unique(level=3)

        msg = r"Requested level \(wrong\) does not match index name \({}\)".format(
            re.escape(indices.name.__repr__())
        )
        with pytest.raises(KeyError, match=msg):
            indices.unique(level="wrong")

    def test_get_unique_index(self, indices):
        # MultiIndex tested separately
        if not len(indices) or isinstance(indices, MultiIndex):
            pytest.skip("Skip check for empty Index and MultiIndex")

        idx = indices[[0] * 5]
        idx_unique = indices[[0]]

        # We test against `idx_unique`, so first we make sure it's unique
        # and doesn't contain nans.
        assert idx_unique.is_unique is True
        try:
            assert idx_unique.hasnans is False
        except NotImplementedError:
            pass

        for dropna in [False, True]:
            result = idx._get_unique_index(dropna=dropna)
            tm.assert_index_equal(result, idx_unique)

        # nans:
        if not indices._can_hold_na:
            pytest.skip("Skip na-check if index cannot hold na")

        if needs_i8_conversion(indices):
            vals = indices.asi8[[0] * 5]
            vals[0] = iNaT
        else:
            vals = indices.values[[0] * 5]
            vals[0] = np.nan

        vals_unique = vals[:2]
        idx_nan = indices._shallow_copy(vals)
        idx_unique_nan = indices._shallow_copy(vals_unique)
        assert idx_unique_nan.is_unique is True

        assert idx_nan.dtype == indices.dtype
        assert idx_unique_nan.dtype == indices.dtype

        for dropna, expected in zip([False, True], [idx_unique_nan, idx_unique]):
            for i in [idx_nan, idx_unique_nan]:
                result = i._get_unique_index(dropna=dropna)
                tm.assert_index_equal(result, expected)

    def test_sort(self, indices):
        msg = "cannot sort an Index object in-place, use sort_values instead"
        with pytest.raises(TypeError, match=msg):
            indices.sort()

    def test_mutability(self, indices):
        if not len(indices):
            pytest.skip("Skip check for empty Index")
        msg = "Index does not support mutable operations"
        with pytest.raises(TypeError, match=msg):
            indices[0] = indices[0]

    def test_view(self, indices):
        assert indices.view().name == indices.name

    def test_compat(self, indices):
        assert indices.tolist() == list(indices)

    def test_searchsorted_monotonic(self, indices):
        # GH17271
        # not implemented for tuple searches in MultiIndex
        # or Intervals searches in IntervalIndex
        if isinstance(indices, (MultiIndex, pd.IntervalIndex)):
            pytest.skip("Skip check for MultiIndex/IntervalIndex")

        # nothing to test if the index is empty
        if indices.empty:
            pytest.skip("Skip check for empty Index")
        value = indices[0]

        # determine the expected results (handle dupes for 'right')
        expected_left, expected_right = 0, (indices == value).argmin()
        if expected_right == 0:
            # all values are the same, expected_right should be length
            expected_right = len(indices)

        # test _searchsorted_monotonic in all cases
        # test searchsorted only for increasing
        if indices.is_monotonic_increasing:
            ssm_left = indices._searchsorted_monotonic(value, side="left")
            assert expected_left == ssm_left

            ssm_right = indices._searchsorted_monotonic(value, side="right")
            assert expected_right == ssm_right

            ss_left = indices.searchsorted(value, side="left")
            assert expected_left == ss_left

            ss_right = indices.searchsorted(value, side="right")
            assert expected_right == ss_right

        elif indices.is_monotonic_decreasing:
            ssm_left = indices._searchsorted_monotonic(value, side="left")
            assert expected_left == ssm_left

            ssm_right = indices._searchsorted_monotonic(value, side="right")
            assert expected_right == ssm_right
        else:
            # non-monotonic should raise.
            with pytest.raises(ValueError):
                indices._searchsorted_monotonic(value, side="left")

    def test_pickle(self, indices):
        original_name, indices.name = indices.name, "foo"
        unpickled = tm.round_trip_pickle(indices)
        assert indices.equals(unpickled)
        indices.name = original_name

    @pytest.mark.parametrize("keep", ["first", "last", False])
    def test_duplicated(self, indices, keep):
        if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
            # MultiIndex tested separately in:
            # tests/indexes/multi/test_unique_and_duplicates
            pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex")

        holder = type(indices)

        idx = holder(indices)
        if idx.has_duplicates:
            # We are testing the duplicated-method here, so we need to know
            # exactly which indices are duplicate and how (for the result).
            # This is not possible if "idx" has duplicates already, which we
            # therefore remove. This is seemingly circular, as drop_duplicates
            # invokes duplicated, but in the end, it all works out because we
            # cross-check with Series.duplicated, which is tested separately.
            idx = idx.drop_duplicates()

        n, k = len(idx), 10
        duplicated_selection = np.random.choice(n, k * n)
        expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
        idx = holder(idx.values[duplicated_selection])

        result = idx.duplicated(keep=keep)
        tm.assert_numpy_array_equal(result, expected)

    def test_has_duplicates(self, indices):
        holder = type(indices)
        if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
            # MultiIndex tested separately in:
            #   tests/indexes/multi/test_unique_and_duplicates.
            # RangeIndex is unique by definition.
            pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")

        idx = holder([indices[0]] * 5)
        assert idx.is_unique is False
        assert idx.has_duplicates is True