import numpy as np
import pytest
import pandas as pd
from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
import pandas.core.common as com
from pandas.tests.arrays.categorical.common import TestCategorical
import pandas.util.testing as tm
class TestCategoricalIndexingWithFactor(TestCategorical):
def test_getitem(self):
assert self.factor[0] == "a"
assert self.factor[-1] == "c"
subf = self.factor[[0, 1, 2]]
tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))
subf = self.factor[np.asarray(self.factor) == "c"]
tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))
def test_setitem(self):
# int/positional
c = self.factor.copy()
c[0] = "b"
assert c[0] == "b"
c[-1] = "a"
assert c[-1] == "a"
# boolean
c = self.factor.copy()
indexer = np.zeros(len(c), dtype="bool")
indexer[0] = True
indexer[-1] = True
c[indexer] = "c"
expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
tm.assert_categorical_equal(c, expected)
@pytest.mark.parametrize(
"other",
[pd.Categorical(["b", "a"]), pd.Categorical(["b", "a"], categories=["b", "a"])],
)
def test_setitem_same_but_unordered(self, other):
# GH-24142
target = pd.Categorical(["a", "b"], categories=["a", "b"])
mask = np.array([True, False])
target[mask] = other[mask]
expected = pd.Categorical(["b", "b"], categories=["a", "b"])
tm.assert_categorical_equal(target, expected)
@pytest.mark.parametrize(
"other",
[
pd.Categorical(["b", "a"], categories=["b", "a", "c"]),
pd.Categorical(["b", "a"], categories=["a", "b", "c"]),
pd.Categorical(["a", "a"], categories=["a"]),
pd.Categorical(["b", "b"], categories=["b"]),
],
)
def test_setitem_different_unordered_raises(self, other):
# GH-24142
target = pd.Categorical(["a", "b"], categories=["a", "b"])
mask = np.array([True, False])
with pytest.raises(ValueError):
target[mask] = other[mask]
@pytest.mark.parametrize(
"other",
[
pd.Categorical(["b", "a"]),
pd.Categorical(["b", "a"], categories=["b", "a"], ordered=True),
pd.Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
],
)
def test_setitem_same_ordered_rasies(self, other):
# Gh-24142
target = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=True)
mask = np.array([True, False])
with pytest.raises(ValueError):
target[mask] = other[mask]
class TestCategoricalIndexing:
def test_getitem_listlike(self):
# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
result = c.codes[np.array([100000]).astype(np.int64)]
expected = c[np.array([100000]).astype(np.int64)].codes
tm.assert_numpy_array_equal(result, expected)
def test_periodindex(self):
idx1 = PeriodIndex(
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
)
cat1 = Categorical(idx1)
str(cat1)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
tm.assert_index_equal(cat1.categories, exp_idx)
idx2 = PeriodIndex(
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
)
cat2 = Categorical(idx2, ordered=True)
str(cat2)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
tm.assert_index_equal(cat2.categories, exp_idx2)
idx3 = PeriodIndex(
[
"2013-12",
"2013-11",
"2013-10",
"2013-09",
"2013-08",
"2013-07",
"2013-05",
],
freq="M",
)
cat3 = Categorical(idx3, ordered=True)
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
exp_idx = PeriodIndex(
[
"2013-05",
"2013-07",
"2013-08",
"2013-09",
"2013-10",
"2013-11",
"2013-12",
],
freq="M",
)
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
tm.assert_index_equal(cat3.categories, exp_idx)
def test_categories_assigments(self):
s = Categorical(["a", "b", "c", "a"])
exp = np.array([1, 2, 3, 1], dtype=np.int64)
s.categories = [1, 2, 3]
tm.assert_numpy_array_equal(s.__array__(), exp)
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
# lengthen
with pytest.raises(ValueError):
s.categories = [1, 2, 3, 4]
# shorten
with pytest.raises(ValueError):
s.categories = [1, 2]
# Combinations of sorted/unique:
@pytest.mark.parametrize(
"idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
)
# Combinations of missing/unique
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
# GH 21448
key = key_class(key_values, categories=range(1, 5))
# Test for flat index and CategoricalIndex with same/different cats:
for dtype in None, "category", key.dtype:
idx = Index(idx_values, dtype=dtype)
expected, exp_miss = idx.get_indexer_non_unique(key_values)
result, res_miss = idx.get_indexer_non_unique(key)
tm.assert_numpy_array_equal(expected, result)
tm.assert_numpy_array_equal(exp_miss, res_miss)
def test_where_unobserved_nan(self):
ser = pd.Series(pd.Categorical(["a", "b"]))
result = ser.where([True, False])
expected = pd.Series(pd.Categorical(["a", None], categories=["a", "b"]))
tm.assert_series_equal(result, expected)
# all NA
ser = pd.Series(pd.Categorical(["a", "b"]))
result = ser.where([False, False])
expected = pd.Series(pd.Categorical([None, None], categories=["a", "b"]))
tm.assert_series_equal(result, expected)
def test_where_unobserved_categories(self):
ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
result = ser.where([True, True, False], other="b")
expected = pd.Series(
Categorical(["a", "b", "b"], categories=ser.cat.categories)
)
tm.assert_series_equal(result, expected)
def test_where_other_categorical(self):
ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
result = ser.where([True, False, True], other)
expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
tm.assert_series_equal(result, expected)
def test_where_warns(self):
ser = pd.Series(Categorical(["a", "b", "c"]))
with tm.assert_produces_warning(FutureWarning):
result = ser.where([True, False, True], "d")
expected = pd.Series(np.array(["a", "d", "c"], dtype="object"))
tm.assert_series_equal(result, expected)
def test_where_ordered_differs_rasies(self):
ser = pd.Series(
Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
)
other = Categorical(
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
)
with tm.assert_produces_warning(FutureWarning):
result = ser.where([True, False, True], other)
expected = pd.Series(np.array(["a", "c", "c"], dtype=object))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [True, False])
def test_mask_with_boolean(index):
s = Series(range(3))
idx = Categorical([True, False, True])
if index:
idx = CategoricalIndex(idx)
assert com.is_bool_indexer(idx)
result = s[idx]
expected = s[idx.astype("object")]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [True, False])
def test_mask_with_boolean_raises(index):
s = Series(range(3))
idx = Categorical([True, False, None])
if index:
idx = CategoricalIndex(idx)
with pytest.raises(ValueError, match="NA / NaN"):
s[idx]
@pytest.fixture
def non_coercible_categorical(monkeypatch):
"""
Monkeypatch Categorical.__array__ to ensure no implicit conversion.
Raises
------
ValueError
When Categorical.__array__ is called.
"""
# TODO(Categorical): identify other places where this may be
# useful and move to a conftest.py
def array(self, dtype=None):
raise ValueError("I cannot be converted.")
with monkeypatch.context() as m:
m.setattr(Categorical, "__array__", array)
yield
def test_series_at(non_coercible_categorical):
arr = Categorical(["a", "b", "c"])
ser = Series(arr)
result = ser.at[0]
assert result == "a"