from datetime import datetime, time, timedelta
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import DatetimeIndex, Index, Timestamp, date_range, notna
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay, CDay
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestGetItem:
def test_ellipsis(self):
# GH#21282
idx = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx2 = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
for idx in [idx1, idx2]:
result = idx[0]
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx[0:5]
expected = pd.date_range(
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = pd.date_range(
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = pd.date_range(
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
freq="-1D",
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
def test_dti_business_getitem(self):
rng = pd.bdate_range(START, END)
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == BDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_business_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END)
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
def test_dti_custom_getitem(self):
rng = pd.bdate_range(START, END, freq="C")
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == CDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_custom_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END, freq="C")
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
class TestWhere:
def test_where_other(self):
# other is ndarray or Index
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
def test_where_tz(self):
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)
class TestTake:
def test_take(self):
# GH#10295
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx2 = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
for idx in [idx1, idx2]:
result = idx.take([0])
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx.take([0, 1, 2])
expected = pd.date_range(
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.date_range(
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = pd.date_range(
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = DatetimeIndex(
["2011-01-04", "2011-01-03", "2011-01-06"],
freq=None,
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = DatetimeIndex(
["2011-01-29", "2011-01-03", "2011-01-06"],
freq=None,
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
# TODO: This method came from test_datetime; de-dup with version above
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
def test_take2(self, tz):
dates = [
datetime(2010, 1, 1, 14),
datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17),
datetime(2010, 1, 1, 21),
]
idx = pd.date_range(
start="2010-01-01 09:00",
end="2010-02-01 09:00",
freq="H",
tz=tz,
name="idx",
)
expected = DatetimeIndex(dates, freq=None, name="idx", tz=tz)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, DatetimeIndex)
assert taken.freq is None
assert taken.tz == expected.tz
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_take_fill_value_with_timezone(self):
idx = pd.DatetimeIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestDatetimeIndex:
@pytest.mark.parametrize("null", [None, np.nan, pd.NaT])
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)
idx = pd.DatetimeIndex(["2017-01-01"], tz=tz)
expected = pd.DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
res = idx.insert(0, null)
tm.assert_index_equal(res, expected)
def test_insert(self):
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx")
result = idx.insert(2, datetime(2000, 1, 5))
exp = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
)
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[
datetime(2000, 1, 4),
"inserted",
datetime(2000, 1, 1),
datetime(2000, 1, 2),
],
name="idx",
)
assert not isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
idx = date_range("1/1/2000", periods=3, freq="M", name="idx")
# preserve freq
expected_0 = DatetimeIndex(
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq="M",
)
expected_3 = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
name="idx",
freq="M",
)
# reset freq to None
expected_1_nofreq = DatetimeIndex(
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq=None,
)
expected_3_nofreq = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
)
cases = [
(0, datetime(1999, 12, 31), expected_0),
(-3, datetime(1999, 12, 31), expected_0),
(3, datetime(2000, 4, 30), expected_3),
(1, datetime(2000, 1, 31), expected_1_nofreq),
(3, datetime(2000, 1, 2), expected_3_nofreq),
]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# reset freq to None
result = idx.insert(3, datetime(2000, 1, 2))
expected = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
# see gh-7299
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp("2000-01-04"))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4))
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern"))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")))
for tz in ["US/Pacific", "Asia/Singapore"]:
idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx")
# preserve freq
expected = date_range(
"1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx"
)
for d in [
pd.Timestamp("2000-01-01 15:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-01 14:00",
"2000-01-01 10:00",
],
name="idx",
tz=tz,
freq=None,
)
# reset freq to None
for d in [
pd.Timestamp("2000-01-01 10:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.tz == expected.tz
assert result.freq is None
def test_delete(self):
idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx")
# prserve freq
expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx")
expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx")
# reset freq to None
expected_1 = DatetimeIndex(
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
freq=None,
name="idx",
)
cases = {
0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
idx.delete(5)
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
idx = date_range(
start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
)
expected = date_range(
start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz
)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "H"
assert result.tz == expected.tz
expected = date_range(
start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz
)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "H"
assert result.tz == expected.tz
def test_delete_slice(self):
idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx")
# prserve freq
expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx")
expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx")
# reset freq to None
expected_3_5 = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
],
freq=None,
name="idx",
)
cases = {
(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
ts = pd.Series(
1,
index=pd.date_range(
"2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
),
)
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = pd.date_range(
"2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 11:00",
"2000-01-01 13:00",
"2000-01-01 15:00",
"2000-01-01 17:00",
],
freq=None,
name="idx",
tz=tz,
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
def test_get_loc(self):
idx = pd.date_range("2000-01-01", periods=3)
for method in [None, "pad", "backfill", "nearest"]:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
if method is not None:
assert (
idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1
)
assert idx.get_loc("2000-01-01", method="nearest") == 0
assert idx.get_loc("2000-01-01T12", method="nearest") == 1
assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D"))
== 1
)
assert (
idx.get_loc(
"2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D")
)
== 1
)
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1
)
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
with pytest.raises(KeyError, match="'2000-01-01T03'"):
idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
with pytest.raises(
ValueError, match="tolerance size must match target index size"
):
idx.get_loc(
"2000-01-01",
method="nearest",
tolerance=[
pd.Timedelta("1day").to_timedelta64(),
pd.Timedelta("1day").to_timedelta64(),
],
)
assert idx.get_loc("2000", method="nearest") == slice(0, 3)
assert idx.get_loc("2000-01", method="nearest") == slice(0, 3)
assert idx.get_loc("1999", method="nearest") == 0
assert idx.get_loc("2001", method="nearest") == 2
with pytest.raises(KeyError, match="'1999'"):
idx.get_loc("1999", method="pad")
with pytest.raises(KeyError, match="'2001'"):
idx.get_loc("2001", method="backfill")
with pytest.raises(KeyError, match="'foobar'"):
idx.get_loc("foobar")
with pytest.raises(TypeError):
idx.get_loc(slice(2))
idx = pd.to_datetime(["2000-01-01", "2000-01-04"])
assert idx.get_loc("2000-01-02", method="nearest") == 0
assert idx.get_loc("2000-01-03", method="nearest") == 1
assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)
# time indexing
idx = pd.date_range("2000-01-01", periods=24, freq="H")
tm.assert_numpy_array_equal(
idx.get_loc(time(12)), np.array([12]), check_dtype=False
)
tm.assert_numpy_array_equal(
idx.get_loc(time(12, 30)), np.array([]), check_dtype=False
)
with pytest.raises(NotImplementedError):
idx.get_loc(time(12, 30), method="pad")
def test_get_indexer(self):
idx = pd.date_range("2000-01-01", periods=3)
exp = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
np.array([0, -1, 1], dtype=np.intp),
)
tol_raw = [
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour").to_timedelta64(),
]
tm.assert_numpy_array_equal(
idx.get_indexer(
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
),
np.array([0, -1, 1], dtype=np.intp),
)
tol_bad = [
pd.Timedelta("2 hour").to_timedelta64(),
pd.Timedelta("1 hour").to_timedelta64(),
"foo",
]
with pytest.raises(ValueError, match="abbreviation w/o a number"):
idx.get_indexer(target, "nearest", tolerance=tol_bad)
with pytest.raises(ValueError):
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
def test_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
with pytest.raises(KeyError, match="2000"):
index.get_loc("1/1/2000")
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
def test_timedelta_invalid_key(self, key):
# GH#20464
dti = pd.date_range("1970-01-01", periods=10)
with pytest.raises(TypeError):
dti.get_loc(key)
def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
assert index.get_loc(pd.NaT) == 1