from textwrap import dedent
import numpy as np
import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
test_frame = DataFrame(
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
index=date_range("1/1/2000", freq="s", periods=40),
)
def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent(
"""\
import pandas.util.testing as tm
s = tm.makeTimeSeries()
rs = s.resample("D")
"""
)
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("rs.", 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [
["2010-01-01", "A", 2],
["2010-01-02", "A", 3],
["2010-01-05", "A", 8],
["2010-01-10", "A", 7],
["2010-01-13", "A", 3],
["2010-01-01", "B", 5],
["2010-01-03", "B", 2],
["2010-01-04", "B", 1],
["2010-01-11", "B", 7],
["2010-01-14", "B", 3],
]
df = DataFrame(data, columns=["date", "id", "score"])
df.date = pd.to_datetime(df.date)
def f(x):
return x.set_index("date").resample("D").asfreq()
expected = df.groupby("id").apply(f)
result = df.set_index("date").groupby("id").resample("D").asfreq()
assert_frame_equal(result, expected)
df = DataFrame(
{
"date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
def f(x):
return x.resample("1D").ffill()
expected = df.groupby("group").apply(f)
result = df.groupby("group").resample("1D").ffill()
assert_frame_equal(result, expected)
def test_getitem():
g = test_frame.groupby("A")
expected = g.B.apply(lambda x: x.resample("2s").mean())
result = g.resample("2s").B.mean()
assert_series_equal(result, expected)
result = g.B.resample("2s").mean()
assert_series_equal(result, expected)
result = g.resample("2s").mean().B
assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
df = DataFrame(data, index=pd.date_range("2016-01-01", periods=2))
r = df.groupby("id").resample("1D")
result = r["buyer"].count()
expected = Series(
[1, 1],
index=pd.MultiIndex.from_tuples(
[(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
names=["id", None],
),
name="buyer",
)
assert_series_equal(result, expected)
result = r["buyer"].count()
assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = pd.DataFrame(
{"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1}
)
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
result = df.groupby("id").resample("2D", on="date")["data"].sum()
assert_series_equal(result, exp)
def test_nearest():
# GH 17496
# Resample nearest
index = pd.date_range("1/1/2000", periods=3, freq="T")
result = Series(range(3), index=index).resample("20s").nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:20",
"2000-01-01 00:00:40",
"2000-01-01 00:01:00",
"2000-01-01 00:01:20",
"2000-01-01 00:01:40",
"2000-01-01 00:02:00",
],
dtype="datetime64[ns]",
freq="20S",
),
)
assert_series_equal(result, expected)
def test_methods():
g = test_frame.groupby("A")
r = g.resample("2s")
for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
for f in ["size"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_series_equal(result, expected)
for f in ["count"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
# series only
for f in ["nunique"]:
result = getattr(r.B, f)()
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
assert_series_equal(result, expected)
for f in ["nearest", "backfill", "ffill", "asfreq"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
result = r.ohlc()
expected = g.apply(lambda x: x.resample("2s").ohlc())
assert_frame_equal(result, expected)
for f in ["std", "var"]:
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
assert_frame_equal(result, expected)
def test_apply():
g = test_frame.groupby("A")
r = g.resample("2s")
# reduction
expected = g.resample("2s").sum()
def f(x):
return x.resample("2s").sum()
result = r.apply(f)
assert_frame_equal(result, expected)
def f(x):
return x.resample("2s").apply(lambda y: y.sum())
result = g.apply(f)
assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = pd.date_range("1-1-2015", "12-31-15", freq="D")
df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
def f(x):
s = Series([1, 2], index=["a", "b"])
return s
expected = df.groupby(pd.Grouper(freq="M")).apply(f)
result = df.resample("M").apply(f)
assert_frame_equal(result, expected)
# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
result = df["col1"].resample("M").apply(f)
assert_series_equal(result, expected)
def test_resample_groupby_with_label():
# GH 13235
index = date_range("2000-01-01", freq="2D", periods=5)
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
result = df.groupby("col0").resample("1W", label="left").sum()
mi = [
np.array([0, 0, 1, 2]),
pd.to_datetime(
np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
),
]
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
expected = DataFrame(
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
)
assert_frame_equal(result, expected)
def test_consistency_with_window():
# consistent return values with window
df = test_frame
expected = pd.Int64Index([1, 2, 3], name="A")
result = df.groupby("A").resample("2s").mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby("A").rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(
np.random.randn(20, 3),
columns=list("aaa"),
index=pd.date_range("2012-01-01", periods=20, freq="s"),
)
df2 = df.copy()
df2.columns = ["a", "b", "c"]
expected = df2.resample("5s").median()
result = df.resample("5s").median()
expected.columns = result.columns
assert_frame_equal(result, expected)