tests/indexing/test_categorical.py · aaronreidsmith/pandas

aaronreidsmith / pandas python

Repository URL to install this package:
Version: 0.25.3

/ tests / indexing / test_categorical.py

import numpy as np
import pytest

from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import (
    Categorical,
    CategoricalIndex,
    DataFrame,
    Index,
    Interval,
    Series,
    Timestamp,
)
from pandas.api.types import CategoricalDtype as CDT
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal


class TestCategoricalIndex:
    def setup_method(self, method):

        self.df = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": Series(list("aabbca")).astype(CDT(list("cab"))),
            }
        ).set_index("B")
        self.df2 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": Series(list("aabbca")).astype(CDT(list("cabe"))),
            }
        ).set_index("B")
        self.df3 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))),
            }
        ).set_index("B")
        self.df4 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
            }
        ).set_index("B")

    def test_loc_scalar(self):
        result = self.df.loc["a"]
        expected = DataFrame(
            {"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))}
        ).set_index("B")
        assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc["a"] = 20
        expected = DataFrame(
            {
                "A": [20, 20, 2, 3, 4, 20],
                "B": (Series(list("aabbca")).astype(CDT(list("cab")))),
            }
        ).set_index("B")
        assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc["d"]

        msg = "cannot append a non-category item to a CategoricalIndex"
        with pytest.raises(TypeError, match=msg):
            df.loc["d"] = 10

        msg = (
            "cannot insert an item into a CategoricalIndex that is not"
            " already an existing category"
        )
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "A"] = 10
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "C"] = 10

    def test_getitem_scalar(self):

        cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])

        s = Series([1, 2], index=cats)

        expected = s.iloc[0]
        result = s[cats[0]]
        assert result == expected

    def test_slicing_directly(self):
        cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
        sliced = cat[3]
        assert sliced == "d"
        sliced = cat[3:5]
        expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
        tm.assert_numpy_array_equal(sliced._codes, expected._codes)
        tm.assert_index_equal(sliced.categories, expected.categories)

    def test_slicing(self):
        cat = Series(Categorical([1, 2, 3, 4]))
        reversed = cat[::-1]
        exp = np.array([4, 3, 2, 1], dtype=np.int64)
        tm.assert_numpy_array_equal(reversed.__array__(), exp)

        df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
        df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])

        expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
        result = df.iloc[10]
        tm.assert_series_equal(result, expected)

        expected = DataFrame(
            {"value": np.arange(11, 21).astype("int64")},
            index=np.arange(10, 20).astype("int64"),
        )
        expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
        result = df.iloc[10:20]
        tm.assert_frame_equal(result, expected)

        expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
        result = df.loc[8]
        tm.assert_series_equal(result, expected)

    def test_slicing_and_getting_ops(self):

        # systematically test the slicing operations:
        #  for all slicing ops:
        #   - returning a dataframe
        #   - returning a column
        #   - returning a row
        #   - returning a single value

        cats = Categorical(
            ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
        )
        idx = Index(["h", "i", "j", "k", "l", "m", "n"])
        values = [1, 2, 3, 4, 5, 6, 7]
        df = DataFrame({"cats": cats, "values": values}, index=idx)

        # the expected values
        cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
        idx2 = Index(["j", "k"])
        values2 = [3, 4]

        # 2:4,: | "j":"k",:
        exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)

        # :,"cats" | :,0
        exp_col = Series(cats, index=idx, name="cats")

        # "j",: | 2,:
        exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")

        # "j","cats | 2,0
        exp_val = "b"

        # iloc
        # frame
        res_df = df.iloc[2:4, :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.iloc[2, :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.iloc[:, 0]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.iloc[2, 0]
        assert res_val == exp_val

        # loc
        # frame
        res_df = df.loc["j":"k", :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.loc["j", :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.loc[:, "cats"]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.loc["j", "cats"]
        assert res_val == exp_val

        # ix
        # frame
        # res_df = df.loc["j":"k",[0,1]] # doesn't work?
        res_df = df.loc["j":"k", :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.loc["j", :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.loc[:, "cats"]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.loc["j", df.columns[0]]
        assert res_val == exp_val

        # iat
        res_val = df.iat[2, 0]
        assert res_val == exp_val

        # at
        res_val = df.at["j", "cats"]
        assert res_val == exp_val

        # fancy indexing
        exp_fancy = df.iloc[[2]]

        res_fancy = df[df["cats"] == "b"]
        tm.assert_frame_equal(res_fancy, exp_fancy)
        res_fancy = df[df["values"] == 3]
        tm.assert_frame_equal(res_fancy, exp_fancy)

        # get_value
        res_val = df.at["j", "cats"]
        assert res_val == exp_val

        # i : int, slice, or sequence of integers
        res_row = df.iloc[2]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        res_df = df.iloc[slice(2, 4)]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        res_df = df.iloc[[2, 3]]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        res_col = df.iloc[:, 0]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        res_df = df.iloc[:, slice(0, 2)]
        tm.assert_frame_equal(res_df, df)
        assert is_categorical_dtype(res_df["cats"])

        res_df = df.iloc[:, [0, 1]]
        tm.assert_frame_equal(res_df, df)
        assert is_categorical_dtype(res_df["cats"])

    def test_slicing_doc_examples(self):

        # GH 7918
        cats = Categorical(
            ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
        )
        idx = Index(["h", "i", "j", "k", "l", "m", "n"])
        values = [1, 2, 2, 2, 3, 4, 5]
        df = DataFrame({"cats": cats, "values": values}, index=idx)

        result = df.iloc[2:4, :]
        expected = DataFrame(
            {
                "cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
                "values": [2, 2],
            },
            index=["j", "k"],
        )
        tm.assert_frame_equal(result, expected)

        result = df.iloc[2:4, :].dtypes
        expected = Series(["category", "int64"], ["cats", "values"])
        tm.assert_series_equal(result, expected)

        result = df.loc["h":"j", "cats"]
        expected = Series(
            Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
            index=["h", "i", "j"],
            name="cats",
        )
        tm.assert_series_equal(result, expected)

        result = df.loc["h":"j", df.columns[0:1]]
        expected = DataFrame(
            {"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
            index=["h", "i", "j"],
        )
        tm.assert_frame_equal(result, expected)

    def test_getitem_category_type(self):
        # GH 14580
        # test iloc() on Series with Categorical data

        s = Series([1, 2, 3]).astype("category")

        # get slice
        result = s.iloc[0:2]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get list of indexes
        result = s.iloc[[0, 1]]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get boolean array
        result = s.iloc[[True, False, False]]
        expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

    def test_loc_listlike(self):

        # list of labels
        result = self.df.loc[["c", "a"]]
        expected = self.df.iloc[[4, 0, 1, 5]]
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.loc[["a", "b", "e"]]
        exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
        expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
        assert_frame_equal(result, expected, check_index_type=True)

        # element in the categories but not in the values
        with pytest.raises(KeyError, match=r"^'e'$"):
            self.df2.loc["e"]

        # assign is ok
        df = self.df2.copy()
        df.loc["e"] = 20
        result = df.loc[["a", "b", "e"]]
        exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
        expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
        assert_frame_equal(result, expected)

        df = self.df2.copy()
        result = df.loc[["a", "b", "e"]]
        exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
        expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
        assert_frame_equal(result, expected, check_index_type=True)

        # not all labels in the categories
        with pytest.raises(
            KeyError,
            match="'a list-indexer must only include values that are in the"
            " categories'",
        ):
            self.df2.loc[["a", "d"]]

    def test_loc_listlike_dtypes(self):
        # GH 11586

        # unique categories and codes
        index = CategoricalIndex(["a", "b", "c"])
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)

        # unique slice
        res = df.loc[["a", "b"]]
        exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
        exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
        tm.assert_frame_equal(res, exp, check_index_type=True)

        # duplicated slice
        res = df.loc[["a", "a", "b"]]

        exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
        exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
        tm.assert_frame_equal(res, exp, check_index_type=True)

        msg = "a list-indexer must only include values that are in the categories"
        with pytest.raises(KeyError, match=msg):
            df.loc[["a", "x"]]

        # duplicated categories and codes
        index = CategoricalIndex(["a", "b", "a"])
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)

        # unique slice
        res = df.loc[["a", "b"]]
        exp = DataFrame(
            {"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
        )
        tm.assert_frame_equal(res, exp, check_index_type=True)

        # duplicated slice
        res = df.loc[["a", "a", "b"]]
        exp = DataFrame(
            {"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
            index=CategoricalIndex(["a", "a", "a", "a", "b"]),
        )
        tm.assert_frame_equal(res, exp, check_index_type=True)

        msg = "a list-indexer must only include values that are in the categories"
        with pytest.raises(KeyError, match=msg):
            df.loc[["a", "x"]]

        # contains unused category
        index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
        df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)

        res = df.loc[["a", "b"]]
        exp = DataFrame(
            {"A": [1, 3, 2], "B": [5, 7, 6]},
            index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
        )
        tm.assert_frame_equal(res, exp, check_index_type=True)

        res = df.loc[["a", "e"]]
        exp = DataFrame(
            {"A": [1, 3, np.nan], "B": [5, 7, np.nan]},
            index=CategoricalIndex(["a", "a", "e"], categories=list("abcde")),
        )
        tm.assert_frame_equal(res, exp, check_index_type=True)

        # duplicated slice
        res = df.loc[["a", "a", "b"]]
        exp = DataFrame(
            {"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
            index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
        )
        tm.assert_frame_equal(res, exp, check_index_type=True)

        msg = "a list-indexer must only include values that are in the categories"
        with pytest.raises(KeyError, match=msg):
            df.loc[["a", "x"]]

    def test_get_indexer_array(self):
        arr = np.array(
            [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
            dtype=object,
        )
        cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
        ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
        result = ci.get_indexer(arr)
        expected = np.array([0, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    def test_get_indexer_same_categories_same_order(self):
        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])

        result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
        expected = np.array([1, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    def test_get_indexer_same_categories_different_order(self):
        # https://github.com/pandas-dev/pandas/issues/19551
        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])

        result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
        expected = np.array([1, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    def test_getitem_with_listlike(self):
        # GH 16115
        cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])

        expected = DataFrame(
            [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
        )
        dummies = pd.get_dummies(cats)
        result = dummies[[c for c in dummies.columns]]
        assert_frame_equal(result, expected)

    def test_setitem_listlike(self):

        # GH 9469
        # properly coerce the input indexers
        np.random.seed(1)
        c = Categorical(
            np.random.randint(0, 5, size=150000).astype(np.int8)
        ).add_categories([-1000])
        indexer = np.array([100000]).astype(np.int64)
        c[indexer] = -1000

        # we are asserting the code result here
        # which maps to the -1000 category
        result = c.codes[np.array([100000]).astype(np.int64)]
        tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))

    def test_ix_categorical_index(self):
        # GH 12531
        df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ"))
        cdf = df.copy()
        cdf.index = CategoricalIndex(df.index)
        cdf.columns = CategoricalIndex(df.columns)

        expect = Series(df.loc["A", :], index=cdf.columns, name="A")
        assert_series_equal(cdf.loc["A", :], expect)

        expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
        assert_series_equal(cdf.loc[:, "X"], expect)

        exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
        expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
        assert_frame_equal(cdf.loc[["A", "B"], :], expect)

        exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
        expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
        assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)

        # non-unique
        df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX"))
        cdf = df.copy()
        cdf.index = CategoricalIndex(df.index)
        cdf.columns = CategoricalIndex(df.columns)

        exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
        expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
        assert_frame_equal(cdf.loc["A", :], expect)

        exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
        expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
        assert_frame_equal(cdf.loc[:, "X"], expect)

        expect = DataFrame(
            df.loc[["A", "B"], :],
            columns=cdf.columns,
            index=CategoricalIndex(list("AAB")),
        )
        assert_frame_equal(cdf.loc[["A", "B"], :], expect)

        expect = DataFrame(
            df.loc[:, ["X", "Y"]],
            index=cdf.index,
            columns=CategoricalIndex(list("XXY")),
        )
        assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)

    def test_read_only_source(self):
        # GH 10043
        rw_array = np.eye(10)
        rw_df = DataFrame(rw_array)

        ro_array = np.eye(10)
        ro_array.setflags(write=False)
        ro_df = DataFrame(ro_array)

        assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
        assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
        assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
        assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])

        assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
        assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
        assert_series_equal(rw_df.loc[1], ro_df.loc[1])
        assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])

    def test_reindexing(self):

        # reindexing
        # convert to a regular index
        result = self.df2.reindex(["a", "b", "e"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["d"])
        expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # since we are actually reindexing with a Categorical
        # then return a Categorical
        cats = list("cabe")

        result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
        expected = DataFrame(
            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(Categorical(["a"], categories=cats))
        expected = DataFrame(
            {"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b", "e"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # give back the type of categorical that we received
        result = self.df2.reindex(
            Categorical(["a", "d"], categories=cats, ordered=True)
        )
        expected = DataFrame(
            {
                "A": [0, 1, 5, np.nan],
                "B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
            }
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
        expected = DataFrame(
            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # passed duplicate indexers are not allowed
        msg = "cannot reindex with a non-unique indexer"
        with pytest.raises(ValueError, match=msg):
            self.df2.reindex(["a", "a"])

        # args NotImplemented ATM
        msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
        with pytest.raises(NotImplementedError, match=msg.format("method")):
            self.df2.reindex(["a"], method="ffill")
        with pytest.raises(NotImplementedError, match=msg.format("level")):
            self.df2.reindex(["a"], level=1)
        with pytest.raises(NotImplementedError, match=msg.format("limit")):
            self.df2.reindex(["a"], limit=2)

    def test_loc_slice(self):
        # slicing
        # not implemented ATM
        # GH9748

        msg = (
            "cannot do slice indexing on {klass} with these "
            r"indexers \[1\] of {kind}".format(
                klass=str(CategoricalIndex), kind=str(int)
            )
        )
        with pytest.raises(TypeError, match=msg):
            self.df.loc[1:5]

        # result = df.loc[1:5]
        # expected = df.iloc[[1,2,3,4]]
        # assert_frame_equal(result, expected)

    def test_loc_and_at_with_categorical_index(self):
        # GH 20629
        s = Series([1, 2, 3], index=pd.CategoricalIndex(["A", "B", "C"]))
        assert s.loc["A"] == 1
        assert s.at["A"] == 1
        df = DataFrame(
            [[1, 2], [3, 4], [5, 6]], index=pd.CategoricalIndex(["A", "B", "C"])
        )
        assert df.loc["B", 1] == 4
        assert df.at["B", 1] == 4

    def test_boolean_selection(self):

        df3 = self.df3
        df4 = self.df4

        result = df3[df3.index == "a"]
        expected = df3.iloc[[]]
        assert_frame_equal(result, expected)

        result = df4[df4.index == "a"]
        expected = df4.iloc[[]]
        assert_frame_equal(result, expected)

        result = df3[df3.index == 1]
        expected = df3.iloc[[0, 1, 3]]
        assert_frame_equal(result, expected)

        result = df4[df4.index == 1]
        expected = df4.iloc[[0, 1, 3]]
        assert_frame_equal(result, expected)

        # since we have an ordered categorical

        # CategoricalIndex([1, 1, 2, 1, 3, 2],
        #         categories=[3, 2, 1],
        #         ordered=True,
        #         name='B')
        result = df3[df3.index < 2]
        expected = df3.iloc[[4]]
        assert_frame_equal(result, expected)

        result = df3[df3.index > 1]
        expected = df3.iloc[[]]
        assert_frame_equal(result, expected)

        # unordered
        # cannot be compared

        # CategoricalIndex([1, 1, 2, 1, 3, 2],
        #         categories=[3, 2, 1],
        #         ordered=False,
        #         name='B')
        msg = "Unordered Categoricals can only compare equality or not"
        with pytest.raises(TypeError, match=msg):
            df4[df4.index < 2]
        with pytest.raises(TypeError, match=msg):
            df4[df4.index > 1]

    def test_indexing_with_category(self):

        # https://github.com/pandas-dev/pandas/issues/12564
        # consistent result if comparing as Dataframe

        cat = DataFrame({"A": ["foo", "bar", "baz"]})
        exp = DataFrame({"A": [True, False, False]})

        res = cat[["A"]] == "foo"
        tm.assert_frame_equal(res, exp)

        cat["A"] = cat["A"].astype("category")

        res = cat[["A"]] == "foo"
        tm.assert_frame_equal(res, exp)

    def test_map_with_dict_or_series(self):
        orig_values = ["a", "B", 1, "a"]
        new_values = ["one", 2, 3.0, "one"]
        cur_index = pd.CategoricalIndex(orig_values, name="XXX")
        expected = pd.CategoricalIndex(
            new_values, name="XXX", categories=[3.0, 2, "one"]
        )

        mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
        output = cur_index.map(mapper)
        # Order of categories in output can be different
        tm.assert_index_equal(expected, output)

        mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])}
        output = cur_index.map(mapper)
        # Order of categories in output can be different
        tm.assert_index_equal(expected, output)
aaronreidsmith / pandas python

Version: 0.25.3

/ tests / indexing / test_categorical.py

Products

About

Resources

Contact Gemfury