Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / arrays / categorical / test_indexing.py

import numpy as np
import pytest

import pandas as pd
from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
import pandas.core.common as com
from pandas.tests.arrays.categorical.common import TestCategorical
import pandas.util.testing as tm


class TestCategoricalIndexingWithFactor(TestCategorical):
    def test_getitem(self):
        assert self.factor[0] == "a"
        assert self.factor[-1] == "c"

        subf = self.factor[[0, 1, 2]]
        tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))

        subf = self.factor[np.asarray(self.factor) == "c"]
        tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))

    def test_setitem(self):

        # int/positional
        c = self.factor.copy()
        c[0] = "b"
        assert c[0] == "b"
        c[-1] = "a"
        assert c[-1] == "a"

        # boolean
        c = self.factor.copy()
        indexer = np.zeros(len(c), dtype="bool")
        indexer[0] = True
        indexer[-1] = True
        c[indexer] = "c"
        expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)

        tm.assert_categorical_equal(c, expected)

    @pytest.mark.parametrize(
        "other",
        [pd.Categorical(["b", "a"]), pd.Categorical(["b", "a"], categories=["b", "a"])],
    )
    def test_setitem_same_but_unordered(self, other):
        # GH-24142
        target = pd.Categorical(["a", "b"], categories=["a", "b"])
        mask = np.array([True, False])
        target[mask] = other[mask]
        expected = pd.Categorical(["b", "b"], categories=["a", "b"])
        tm.assert_categorical_equal(target, expected)

    @pytest.mark.parametrize(
        "other",
        [
            pd.Categorical(["b", "a"], categories=["b", "a", "c"]),
            pd.Categorical(["b", "a"], categories=["a", "b", "c"]),
            pd.Categorical(["a", "a"], categories=["a"]),
            pd.Categorical(["b", "b"], categories=["b"]),
        ],
    )
    def test_setitem_different_unordered_raises(self, other):
        # GH-24142
        target = pd.Categorical(["a", "b"], categories=["a", "b"])
        mask = np.array([True, False])
        with pytest.raises(ValueError):
            target[mask] = other[mask]

    @pytest.mark.parametrize(
        "other",
        [
            pd.Categorical(["b", "a"]),
            pd.Categorical(["b", "a"], categories=["b", "a"], ordered=True),
            pd.Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
        ],
    )
    def test_setitem_same_ordered_rasies(self, other):
        # Gh-24142
        target = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=True)
        mask = np.array([True, False])

        with pytest.raises(ValueError):
            target[mask] = other[mask]


class TestCategoricalIndexing:
    def test_getitem_listlike(self):

        # GH 9469
        # properly coerce the input indexers
        np.random.seed(1)
        c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
        result = c.codes[np.array([100000]).astype(np.int64)]
        expected = c[np.array([100000]).astype(np.int64)].codes
        tm.assert_numpy_array_equal(result, expected)

    def test_periodindex(self):
        idx1 = PeriodIndex(
            ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
        )

        cat1 = Categorical(idx1)
        str(cat1)
        exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
        exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
        tm.assert_numpy_array_equal(cat1._codes, exp_arr)
        tm.assert_index_equal(cat1.categories, exp_idx)

        idx2 = PeriodIndex(
            ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
        )
        cat2 = Categorical(idx2, ordered=True)
        str(cat2)
        exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
        exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
        tm.assert_numpy_array_equal(cat2._codes, exp_arr)
        tm.assert_index_equal(cat2.categories, exp_idx2)

        idx3 = PeriodIndex(
            [
                "2013-12",
                "2013-11",
                "2013-10",
                "2013-09",
                "2013-08",
                "2013-07",
                "2013-05",
            ],
            freq="M",
        )
        cat3 = Categorical(idx3, ordered=True)
        exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
        exp_idx = PeriodIndex(
            [
                "2013-05",
                "2013-07",
                "2013-08",
                "2013-09",
                "2013-10",
                "2013-11",
                "2013-12",
            ],
            freq="M",
        )
        tm.assert_numpy_array_equal(cat3._codes, exp_arr)
        tm.assert_index_equal(cat3.categories, exp_idx)

    def test_categories_assigments(self):
        s = Categorical(["a", "b", "c", "a"])
        exp = np.array([1, 2, 3, 1], dtype=np.int64)
        s.categories = [1, 2, 3]
        tm.assert_numpy_array_equal(s.__array__(), exp)
        tm.assert_index_equal(s.categories, Index([1, 2, 3]))

        # lengthen
        with pytest.raises(ValueError):
            s.categories = [1, 2, 3, 4]

        # shorten
        with pytest.raises(ValueError):
            s.categories = [1, 2]

    # Combinations of sorted/unique:
    @pytest.mark.parametrize(
        "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
    )
    # Combinations of missing/unique
    @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
    @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
        # GH 21448
        key = key_class(key_values, categories=range(1, 5))
        # Test for flat index and CategoricalIndex with same/different cats:
        for dtype in None, "category", key.dtype:
            idx = Index(idx_values, dtype=dtype)
            expected, exp_miss = idx.get_indexer_non_unique(key_values)
            result, res_miss = idx.get_indexer_non_unique(key)

            tm.assert_numpy_array_equal(expected, result)
            tm.assert_numpy_array_equal(exp_miss, res_miss)

    def test_where_unobserved_nan(self):
        ser = pd.Series(pd.Categorical(["a", "b"]))
        result = ser.where([True, False])
        expected = pd.Series(pd.Categorical(["a", None], categories=["a", "b"]))
        tm.assert_series_equal(result, expected)

        # all NA
        ser = pd.Series(pd.Categorical(["a", "b"]))
        result = ser.where([False, False])
        expected = pd.Series(pd.Categorical([None, None], categories=["a", "b"]))
        tm.assert_series_equal(result, expected)

    def test_where_unobserved_categories(self):
        ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
        result = ser.where([True, True, False], other="b")
        expected = pd.Series(
            Categorical(["a", "b", "b"], categories=ser.cat.categories)
        )
        tm.assert_series_equal(result, expected)

    def test_where_other_categorical(self):
        ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
        other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
        result = ser.where([True, False, True], other)
        expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
        tm.assert_series_equal(result, expected)

    def test_where_warns(self):
        ser = pd.Series(Categorical(["a", "b", "c"]))
        with tm.assert_produces_warning(FutureWarning):
            result = ser.where([True, False, True], "d")

        expected = pd.Series(np.array(["a", "d", "c"], dtype="object"))
        tm.assert_series_equal(result, expected)

    def test_where_ordered_differs_rasies(self):
        ser = pd.Series(
            Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
        )
        other = Categorical(
            ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
        )
        with tm.assert_produces_warning(FutureWarning):
            result = ser.where([True, False, True], other)

        expected = pd.Series(np.array(["a", "c", "c"], dtype=object))
        tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("index", [True, False])
def test_mask_with_boolean(index):
    s = Series(range(3))
    idx = Categorical([True, False, True])
    if index:
        idx = CategoricalIndex(idx)

    assert com.is_bool_indexer(idx)
    result = s[idx]
    expected = s[idx.astype("object")]
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("index", [True, False])
def test_mask_with_boolean_raises(index):
    s = Series(range(3))
    idx = Categorical([True, False, None])
    if index:
        idx = CategoricalIndex(idx)

    with pytest.raises(ValueError, match="NA / NaN"):
        s[idx]


@pytest.fixture
def non_coercible_categorical(monkeypatch):
    """
    Monkeypatch Categorical.__array__ to ensure no implicit conversion.

    Raises
    ------
    ValueError
        When Categorical.__array__ is called.
    """
    # TODO(Categorical): identify other places where this may be
    # useful and move to a conftest.py
    def array(self, dtype=None):
        raise ValueError("I cannot be converted.")

    with monkeypatch.context() as m:
        m.setattr(Categorical, "__array__", array)
        yield


def test_series_at(non_coercible_categorical):
    arr = Categorical(["a", "b", "c"])
    ser = Series(arr)
    result = ser.at[0]
    assert result == "a"