import numpy as np
import pytest
import pandas as pd
from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
from pandas.core.arrays import PeriodArray
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
class TestPeriodIndexOps(Ops):
def setup_method(self, method):
super().setup_method(method)
mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = [o for o in self.objs if not mask(o)]
def test_ops_properties(self):
f = lambda x: isinstance(x, PeriodIndex)
self.check_ops_properties(PeriodArray._field_ops, f)
self.check_ops_properties(PeriodArray._object_ops, f)
self.check_ops_properties(PeriodArray._bool_ops, f)
def test_resolution(self):
for freq, expected in zip(
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
[
"day",
"day",
"day",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
],
):
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
assert idx.resolution == expected
def test_value_counts_unique(self):
# GH 7735
idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")
exp_idx = PeriodIndex(
[
"2011-01-01 18:00",
"2011-01-01 17:00",
"2011-01-01 16:00",
"2011-01-01 15:00",
"2011-01-01 14:00",
"2011-01-01 13:00",
"2011-01-01 12:00",
"2011-01-01 11:00",
"2011-01-01 10:00",
"2011-01-01 09:00",
],
freq="H",
)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
tm.assert_index_equal(idx.unique(), expected)
idx = PeriodIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
freq="H",
)
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx) # freq will not be reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
def test_order_compat(self):
def _check_freq(index, expected_index):
if isinstance(index, PeriodIndex):
assert index.freq == expected_index.freq
pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
# for compatibility check
iidx = Index([2011, 2012, 2013], name="idx")
for idx in [pidx, iidx]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
_check_freq(ordered, idx[::-1])
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
_check_freq(ordered, idx[::-1])
pidx = PeriodIndex(
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
)
pexpected = PeriodIndex(
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
)
# for compatibility check
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
result = pidx.sort_values()
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
tm.assert_index_equal(result, expected)
assert result.freq == "D"
result = pidx.sort_values(ascending=False)
expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
tm.assert_index_equal(result, expected)
assert result.freq == "D"
def test_order(self):
for freq in ["D", "2D", "4D"]:
idx = PeriodIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
)
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
assert ordered.freq == idx.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
assert ordered.freq == expected.freq
assert ordered.freq == freq
idx1 = PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
freq="D",
name="idx1",
)
exp1 = PeriodIndex(
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx1",
)
idx2 = PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
freq="D",
name="idx2",
)
exp2 = PeriodIndex(
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx2",
)
idx3 = PeriodIndex(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
)
exp3 = PeriodIndex(
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
)
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq == "D"
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq == "D"
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == "D"
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == "D"
def test_shift(self):
# This is tested in test_arithmetic
pass
def test_nat(self):
assert pd.PeriodIndex._na_value is NaT
assert pd.PeriodIndex([], freq="M")._na_value is NaT
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
idx = pd.PeriodIndex(["2011-01-01", "NaT"], freq="D")
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
@pytest.mark.parametrize("freq", ["D", "M"])
def test_equals(self, freq):
# GH#13107
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = pd.PeriodIndex._simple_new(
idx._values._simple_new(idx._values.asi8, freq="H")
)
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
def test_freq_setter_deprecated(self):
# GH 20678
idx = pd.period_range("2018Q1", periods=4, freq="Q")
# no warning for getter
with tm.assert_produces_warning(None):
idx.freq
# warning for setter
with tm.assert_produces_warning(FutureWarning):
idx.freq = pd.offsets.Day()