Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / pandas   python

Repository URL to install this package:

Version: 1.1.1 

/ tests / indexes / period / test_ops.py

import numpy as np
import pytest

import pandas as pd
from pandas import Index, NaT, PeriodIndex, Series
import pandas._testing as tm


class TestPeriodIndexOps:
    @pytest.mark.parametrize(
        "freq,expected",
        [
            ("A", "year"),
            ("Q", "quarter"),
            ("M", "month"),
            ("D", "day"),
            ("H", "hour"),
            ("T", "minute"),
            ("S", "second"),
            ("L", "millisecond"),
            ("U", "microsecond"),
        ],
    )
    def test_resolution(self, freq, expected):
        idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
        assert idx.resolution == expected

    def test_value_counts_unique(self):
        # GH 7735
        idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")

        exp_idx = PeriodIndex(
            [
                "2011-01-01 18:00",
                "2011-01-01 17:00",
                "2011-01-01 16:00",
                "2011-01-01 15:00",
                "2011-01-01 14:00",
                "2011-01-01 13:00",
                "2011-01-01 12:00",
                "2011-01-01 11:00",
                "2011-01-01 10:00",
                "2011-01-01 09:00",
            ],
            freq="H",
        )
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = PeriodIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                NaT,
            ],
            freq="H",
        )

        exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
    def test_drop_duplicates_metadata(self, freq):
        # GH 10115
        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)  # freq will not be reset
        result = idx_dup.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq, keep, expected, index):
        # to check Index/Series compat
        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_order_compat(self):
        def _check_freq(index, expected_index):
            if isinstance(index, PeriodIndex):
                assert index.freq == expected_index.freq

        pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
        # for compatibility check
        iidx = Index([2011, 2012, 2013], name="idx")
        for idx in [pidx, iidx]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            _check_freq(ordered, idx)

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            _check_freq(ordered, idx[::-1])

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
            _check_freq(ordered, idx)

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
            _check_freq(ordered, idx[::-1])

        pidx = PeriodIndex(
            ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
        )
        pexpected = PeriodIndex(
            ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
        )
        # for compatibility check
        iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
        iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
        for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            _check_freq(ordered, idx)

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            _check_freq(ordered, idx)

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            _check_freq(ordered, idx)

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            _check_freq(ordered, idx)

        pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")

        result = pidx.sort_values()
        expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
        tm.assert_index_equal(result, expected)
        assert result.freq == "D"

        result = pidx.sort_values(ascending=False)
        expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
        tm.assert_index_equal(result, expected)
        assert result.freq == "D"

    def test_order(self):
        for freq in ["D", "2D", "4D"]:
            idx = PeriodIndex(
                ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
            )

            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq == freq

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
            assert ordered.freq == idx.freq
            assert ordered.freq == freq

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
            assert ordered.freq == expected.freq
            assert ordered.freq == freq

        idx1 = PeriodIndex(
            ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
            freq="D",
            name="idx1",
        )
        exp1 = PeriodIndex(
            ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            freq="D",
            name="idx1",
        )

        idx2 = PeriodIndex(
            ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
            freq="D",
            name="idx2",
        )
        exp2 = PeriodIndex(
            ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
            freq="D",
            name="idx2",
        )

        idx3 = PeriodIndex(
            [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
        )
        exp3 = PeriodIndex(
            [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
        )

        for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == "D"

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq == "D"

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq == "D"

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 4, 0])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq == "D"

    def test_nat(self):
        assert pd.PeriodIndex._na_value is NaT
        assert pd.PeriodIndex([], freq="M")._na_value is NaT

        idx = pd.PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = pd.PeriodIndex(["2011-01-01", "NaT"], freq="D")
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    @pytest.mark.parametrize("freq", ["D", "M"])
    def test_equals(self, freq):
        # GH#13107
        idx = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(pd.Series(idx))

        idx2 = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(pd.Series(idx2))

        # same internal, different tz
        idx3 = pd.PeriodIndex._simple_new(
            idx._values._simple_new(idx._values.asi8, freq="H")
        )
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(pd.Series(idx3))

    def test_freq_setter_deprecated(self):
        # GH 20678
        idx = pd.period_range("2018Q1", periods=4, freq="Q")

        # no warning for getter
        with tm.assert_produces_warning(None):
            idx.freq

        # warning for setter
        with pytest.raises(AttributeError, match="can't set attribute"):
            idx.freq = pd.offsets.Day()