tests/resample/test_datetime_index.py · aaronreidsmith/pandas

aaronreidsmith / pandas python

Repository URL to install this package:
Version: 0.25.3

/ tests / resample / test_datetime_index.py

from datetime import datetime, timedelta
from functools import partial
from io import StringIO

import numpy as np
import pytest
import pytz

from pandas.errors import UnsupportedFunctionCall

import pandas as pd
from pandas import DataFrame, Series, Timedelta, Timestamp, isna, notna
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import Period, period_range
from pandas.core.resample import DatetimeIndex, _get_timestamp_range_edges
import pandas.util.testing as tm
from pandas.util.testing import (
    assert_almost_equal,
    assert_frame_equal,
    assert_series_equal,
)

import pandas.tseries.offsets as offsets
from pandas.tseries.offsets import BDay, Minute


@pytest.fixture()
def _index_factory():
    return date_range


@pytest.fixture
def _index_freq():
    return "Min"


@pytest.fixture
def _static_values(index):
    return np.random.rand(len(index))


def test_custom_grouper(index):

    dti = index
    s = Series(np.array([1] * len(dti)), index=dti, dtype="int64")

    b = Grouper(freq=Minute(5))
    g = s.groupby(b)

    # check all cython functions work
    funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    b = Grouper(freq=Minute(5), closed="right", label="right")
    g = s.groupby(b)
    # check all cython functions work
    funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    assert g.ngroups == 2593
    assert notna(g.mean()).all()

    # construct expected val
    arr = [1] + [5] * 2592
    idx = dti[0:-1:5]
    idx = idx.append(dti[-1:])
    expect = Series(arr, index=idx)

    # GH2763 - return in put dtype if we can
    result = g.agg(np.sum)
    assert_series_equal(result, expect)

    df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64")
    r = df.groupby(b).agg(np.sum)

    assert len(r.columns) == 10
    assert len(r.index) == 2593


@pytest.mark.parametrize(
    "_index_start,_index_end,_index_name",
    [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
)
@pytest.mark.parametrize(
    "closed, expected",
    [
        (
            "right",
            lambda s: Series(
                [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                index=date_range("1/1/2000", periods=4, freq="5min", name="index"),
            ),
        ),
        (
            "left",
            lambda s: Series(
                [s[:5].mean(), s[5:10].mean(), s[10:].mean()],
                index=date_range(
                    "1/1/2000 00:05", periods=3, freq="5min", name="index"
                ),
            ),
        ),
    ],
)
def test_resample_basic(series, closed, expected):
    s = series
    expected = expected(s)
    result = s.resample("5min", closed=closed, label="right").mean()
    assert_series_equal(result, expected)


def test_resample_integerarray():
    # GH 25580, resample on IntegerArray
    ts = pd.Series(
        range(9), index=pd.date_range("1/1/2000", periods=9, freq="T"), dtype="Int64"
    )
    result = ts.resample("3T").sum()
    expected = Series(
        [3, 12, 21],
        index=pd.date_range("1/1/2000", periods=3, freq="3T"),
        dtype="Int64",
    )
    assert_series_equal(result, expected)

    result = ts.resample("3T").mean()
    expected = Series(
        [1, 4, 7], index=pd.date_range("1/1/2000", periods=3, freq="3T"), dtype="Int64"
    )
    assert_series_equal(result, expected)


def test_resample_basic_grouper(series):
    s = series
    result = s.resample("5Min").last()
    grouper = Grouper(freq=Minute(5), closed="left", label="left")
    expected = s.groupby(grouper).agg(lambda x: x[-1])
    assert_series_equal(result, expected)


@pytest.mark.parametrize(
    "_index_start,_index_end,_index_name",
    [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
)
@pytest.mark.parametrize(
    "keyword,value",
    [("label", "righttt"), ("closed", "righttt"), ("convention", "starttt")],
)
def test_resample_string_kwargs(series, keyword, value):
    # see gh-19303
    # Check that wrong keyword argument strings raise an error
    msg = "Unsupported value {value} for `{keyword}`".format(
        value=value, keyword=keyword
    )
    with pytest.raises(ValueError, match=msg):
        series.resample("5min", **({keyword: value}))


@pytest.mark.parametrize(
    "_index_start,_index_end,_index_name",
    [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
)
def test_resample_how(series, downsample_method):
    if downsample_method == "ohlc":
        pytest.skip("covered by test_resample_how_ohlc")

    s = series
    grouplist = np.ones_like(s)
    grouplist[0] = 0
    grouplist[1:6] = 1
    grouplist[6:11] = 2
    grouplist[11:] = 3
    expected = s.groupby(grouplist).agg(downsample_method)
    expected.index = date_range("1/1/2000", periods=4, freq="5min", name="index")

    result = getattr(
        s.resample("5min", closed="right", label="right"), downsample_method
    )()
    assert_series_equal(result, expected)


@pytest.mark.parametrize(
    "_index_start,_index_end,_index_name",
    [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
)
def test_resample_how_ohlc(series):
    s = series
    grouplist = np.ones_like(s)
    grouplist[0] = 0
    grouplist[1:6] = 1
    grouplist[6:11] = 2
    grouplist[11:] = 3

    def _ohlc(group):
        if isna(group).all():
            return np.repeat(np.nan, 4)
        return [group[0], group.max(), group.min(), group[-1]]

    expected = DataFrame(
        s.groupby(grouplist).agg(_ohlc).values.tolist(),
        index=date_range("1/1/2000", periods=4, freq="5min", name="index"),
        columns=["open", "high", "low", "close"],
    )

    result = s.resample("5min", closed="right", label="right").ohlc()
    assert_frame_equal(result, expected)


@pytest.mark.parametrize("func", ["min", "max", "sum", "prod", "mean", "var", "std"])
def test_numpy_compat(func):
    # see gh-12811
    s = Series([1, 2, 3, 4, 5], index=date_range("20130101", periods=5, freq="s"))
    r = s.resample("2s")

    msg = "numpy operations are not valid with resample"

    with pytest.raises(UnsupportedFunctionCall, match=msg):
        getattr(r, func)(func, 1, 2, 3)
    with pytest.raises(UnsupportedFunctionCall, match=msg):
        getattr(r, func)(axis=1)


def test_resample_how_callables():
    # GH#7929
    data = np.arange(5, dtype=np.int64)
    ind = date_range(start="2014-01-01", periods=len(data), freq="d")
    df = DataFrame({"A": data, "B": data}, index=ind)

    def fn(x, a=1):
        return str(type(x))

    class FnClass:
        def __call__(self, x):
            return str(type(x))

    df_standard = df.resample("M").apply(fn)
    df_lambda = df.resample("M").apply(lambda x: str(type(x)))
    df_partial = df.resample("M").apply(partial(fn))
    df_partial2 = df.resample("M").apply(partial(fn, a=2))
    df_class = df.resample("M").apply(FnClass())

    assert_frame_equal(df_standard, df_lambda)
    assert_frame_equal(df_standard, df_partial)
    assert_frame_equal(df_standard, df_partial2)
    assert_frame_equal(df_standard, df_class)


def test_resample_rounding():
    # GH 8371
    # odd results when rounding is needed

    data = """date,time,value
11-08-2014,00:00:01.093,1
11-08-2014,00:00:02.159,1
11-08-2014,00:00:02.667,1
11-08-2014,00:00:03.175,1
11-08-2014,00:00:07.058,1
11-08-2014,00:00:07.362,1
11-08-2014,00:00:08.324,1
11-08-2014,00:00:08.830,1
11-08-2014,00:00:08.982,1
11-08-2014,00:00:09.815,1
11-08-2014,00:00:10.540,1
11-08-2014,00:00:11.061,1
11-08-2014,00:00:11.617,1
11-08-2014,00:00:13.607,1
11-08-2014,00:00:14.535,1
11-08-2014,00:00:15.525,1
11-08-2014,00:00:17.960,1
11-08-2014,00:00:20.674,1
11-08-2014,00:00:21.191,1"""

    df = pd.read_csv(
        StringIO(data),
        parse_dates={"timestamp": ["date", "time"]},
        index_col="timestamp",
    )
    df.index.name = None
    result = df.resample("6s").sum()
    expected = DataFrame(
        {"value": [4, 9, 4, 2]}, index=date_range("2014-11-08", freq="6s", periods=4)
    )
    assert_frame_equal(result, expected)

    result = df.resample("7s").sum()
    expected = DataFrame(
        {"value": [4, 10, 4, 1]}, index=date_range("2014-11-08", freq="7s", periods=4)
    )
    assert_frame_equal(result, expected)

    result = df.resample("11s").sum()
    expected = DataFrame(
        {"value": [11, 8]}, index=date_range("2014-11-08", freq="11s", periods=2)
    )
    assert_frame_equal(result, expected)

    result = df.resample("13s").sum()
    expected = DataFrame(
        {"value": [13, 6]}, index=date_range("2014-11-08", freq="13s", periods=2)
    )
    assert_frame_equal(result, expected)

    result = df.resample("17s").sum()
    expected = DataFrame(
        {"value": [16, 3]}, index=date_range("2014-11-08", freq="17s", periods=2)
    )
    assert_frame_equal(result, expected)


def test_resample_basic_from_daily():
    # from daily
    dti = date_range(
        start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index"
    )

    s = Series(np.random.rand(len(dti)), dti)

    # to weekly
    result = s.resample("w-sun").last()

    assert len(result) == 3
    assert (result.index.dayofweek == [6, 6, 6]).all()
    assert result.iloc[0] == s["1/2/2005"]
    assert result.iloc[1] == s["1/9/2005"]
    assert result.iloc[2] == s.iloc[-1]

    result = s.resample("W-MON").last()
    assert len(result) == 2
    assert (result.index.dayofweek == [0, 0]).all()
    assert result.iloc[0] == s["1/3/2005"]
    assert result.iloc[1] == s["1/10/2005"]

    result = s.resample("W-TUE").last()
    assert len(result) == 2
    assert (result.index.dayofweek == [1, 1]).all()
    assert result.iloc[0] == s["1/4/2005"]
    assert result.iloc[1] == s["1/10/2005"]

    result = s.resample("W-WED").last()
    assert len(result) == 2
    assert (result.index.dayofweek == [2, 2]).all()
    assert result.iloc[0] == s["1/5/2005"]
    assert result.iloc[1] == s["1/10/2005"]

    result = s.resample("W-THU").last()
    assert len(result) == 2
    assert (result.index.dayofweek == [3, 3]).all()
    assert result.iloc[0] == s["1/6/2005"]
    assert result.iloc[1] == s["1/10/2005"]

    result = s.resample("W-FRI").last()
    assert len(result) == 2
    assert (result.index.dayofweek == [4, 4]).all()
    assert result.iloc[0] == s["1/7/2005"]
    assert result.iloc[1] == s["1/10/2005"]

    # to biz day
    result = s.resample("B").last()
    assert len(result) == 7
    assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all()

    assert result.iloc[0] == s["1/2/2005"]
    assert result.iloc[1] == s["1/3/2005"]
    assert result.iloc[5] == s["1/9/2005"]
    assert result.index.name == "index"


def test_resample_upsampling_picked_but_not_correct():

    # Test for issue #3020
    dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D")
    series = Series(1, index=dates)

    result = series.resample("D").mean()
    assert result.index[0] == dates[0]

    # GH 5955
    # incorrect deciding to upsample when the axis frequency matches the
    # resample frequency

    s = Series(
        np.arange(1.0, 6), index=[datetime(1975, 1, i, 12, 0) for i in range(1, 6)]
    )
    expected = Series(
        np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D")
    )

    result = s.resample("D").count()
    assert_series_equal(result, Series(1, index=expected.index))

    result1 = s.resample("D").sum()
    result2 = s.resample("D").mean()
    assert_series_equal(result1, expected)
    assert_series_equal(result2, expected)


def test_resample_frame_basic():
    df = tm.makeTimeDataFrame()

    b = Grouper(freq="M")
    g = df.groupby(b)

    # check all cython functions work
    funcs = ["add", "mean", "prod", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    result = df.resample("A").mean()
    assert_series_equal(result["A"], df["A"].resample("A").mean())

    result = df.resample("M").mean()
    assert_series_equal(result["A"], df["A"].resample("M").mean())

    df.resample("M", kind="period").mean()
    df.resample("W-WED", kind="period").mean()


@pytest.mark.parametrize(
    "loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")]
)
def test_resample_loffset(loffset):
    # GH 7687
    rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
    s = Series(np.random.randn(14), index=rng)

    result = s.resample("5min", closed="right", label="right", loffset=loffset).mean()
    idx = date_range("1/1/2000", periods=4, freq="5min")
    expected = Series(
        [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
        index=idx + timedelta(minutes=1),
    )
    assert_series_equal(result, expected)
    assert result.index.freq == Minute(5)

    # from daily
    dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
    ser = Series(np.random.rand(len(dti)), dti)

    # to weekly
    result = ser.resample("w-sun").last()
    business_day_offset = BDay()
    expected = ser.resample("w-sun", loffset=-business_day_offset).last()
    assert result.index[0] - business_day_offset == expected.index[0]


def test_resample_loffset_upsample():
    # GH 20744
    rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
    s = Series(np.random.randn(14), index=rng)

    result = s.resample(
        "5min", closed="right", label="right", loffset=timedelta(minutes=1)
    ).ffill()
    idx = date_range("1/1/2000", periods=4, freq="5min")
    expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1))

    assert_series_equal(result, expected)


def test_resample_loffset_count():
    # GH 12725
    start_time = "1/1/2000 00:00:00"
    rng = date_range(start_time, periods=100, freq="S")
    ts = Series(np.random.randn(len(rng)), index=rng)

    result = ts.resample("10S", loffset="1s").count()

    expected_index = date_range(start_time, periods=10, freq="10S") + timedelta(
        seconds=1
    )
    expected = Series(10, index=expected_index)

    assert_series_equal(result, expected)

    # Same issue should apply to .size() since it goes through
    #   same code path
    result = ts.resample("10S", loffset="1s").size()

    assert_series_equal(result, expected)


def test_resample_upsample():
    # from daily
    dti = date_range(
        start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index"
    )

    s = Series(np.random.rand(len(dti)), dti)

    # to minutely, by padding
    result = s.resample("Min").pad()
    assert len(result) == 12961
    assert result[0] == s[0]
    assert result[-1] == s[-1]

    assert result.index.name == "index"


def test_resample_how_method():
    # GH9915
    s = Series(
        [11, 22],
        index=[
            Timestamp("2015-03-31 21:48:52.672000"),
            Timestamp("2015-03-31 21:49:52.739000"),
        ],
    )
    expected = Series(
        [11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22],
        index=[
            Timestamp("2015-03-31 21:48:50"),
            Timestamp("2015-03-31 21:49:00"),
            Timestamp("2015-03-31 21:49:10"),
            Timestamp("2015-03-31 21:49:20"),
            Timestamp("2015-03-31 21:49:30"),
            Timestamp("2015-03-31 21:49:40"),
            Timestamp("2015-03-31 21:49:50"),
        ],
    )
    assert_series_equal(s.resample("10S").mean(), expected)


def test_resample_extra_index_point():
    # GH#9756
    index = date_range(start="20150101", end="20150331", freq="BM")
    expected = DataFrame({"A": Series([21, 41, 63], index=index)})

    index = date_range(start="20150101", end="20150331", freq="B")
    df = DataFrame({"A": Series(range(len(index)), index=index)}, dtype="int64")
    result = df.resample("BM").last()
    assert_frame_equal(result, expected)


def test_upsample_with_limit():
    rng = date_range("1/1/2000", periods=3, freq="5t")
    ts = Series(np.random.randn(len(rng)), rng)

    result = ts.resample("t").ffill(limit=2)
    expected = ts.reindex(result.index, method="ffill", limit=2)
    assert_series_equal(result, expected)


def test_nearest_upsample_with_limit():
    rng = date_range("1/1/2000", periods=3, freq="5t")
    ts = Series(np.random.randn(len(rng)), rng)

    result = ts.resample("t").nearest(limit=2)
    expected = ts.reindex(result.index, method="nearest", limit=2)
    assert_series_equal(result, expected)


def test_resample_ohlc(series):
    s = series

    grouper = Grouper(freq=Minute(5))
    expect = s.groupby(grouper).agg(lambda x: x[-1])
    result = s.resample("5Min").ohlc()

    assert len(result) == len(expect)
    assert len(result.columns) == 4

    xs = result.iloc[-2]
    assert xs["open"] == s[-6]
    assert xs["high"] == s[-6:-1].max()
    assert xs["low"] == s[-6:-1].min()
    assert xs["close"] == s[-2]

    xs = result.iloc[0]
    assert xs["open"] == s[0]
    assert xs["high"] == s[:5].max()
    assert xs["low"] == s[:5].min()
    assert xs["close"] == s[4]


def test_resample_ohlc_result():

    # GH 12332
    index = pd.date_range("1-1-2000", "2-15-2000", freq="h")
    index = index.union(pd.date_range("4-15-2000", "5-15-2000", freq="h"))
    s = Series(range(len(index)), index=index)

    a = s.loc[:"4-15-2000"].resample("30T").ohlc()
    assert isinstance(a, DataFrame)

    b = s.loc[:"4-14-2000"].resample("30T").ohlc()
    assert isinstance(b, DataFrame)

    # GH12348
    # raising on odd period
    rng = date_range("2013-12-30", "2014-01-07")
    index = rng.drop(
        [
            Timestamp("2014-01-01"),
            Timestamp("2013-12-31"),
            Timestamp("2014-01-04"),
            Timestamp("2014-01-05"),
        ]
    )
    df = DataFrame(data=np.arange(len(index)), index=index)
    result = df.resample("B").mean()
    expected = df.reindex(index=date_range(rng[0], rng[-1], freq="B"))
    assert_frame_equal(result, expected)


def test_resample_ohlc_dataframe():
    df = (
        DataFrame(
            {
                "PRICE": {
                    Timestamp("2011-01-06 10:59:05", tz=None): 24990,
                    Timestamp("2011-01-06 12:43:33", tz=None): 25499,
                    Timestamp("2011-01-06 12:54:09", tz=None): 25499,
                },
                "VOLUME": {
                    Timestamp("2011-01-06 10:59:05", tz=None): 1500000000,
                    Timestamp("2011-01-06 12:43:33", tz=None): 5000000000,
                    Timestamp("2011-01-06 12:54:09", tz=None): 100000000,
                },
            }
        )
    ).reindex(["VOLUME", "PRICE"], axis=1)
    res = df.resample("H").ohlc()
    exp = pd.concat(
        [df["VOLUME"].resample("H").ohlc(), df["PRICE"].resample("H").ohlc()],
        axis=1,
        keys=["VOLUME", "PRICE"],
    )
    assert_frame_equal(exp, res)

    df.columns = [["a", "b"], ["c", "d"]]
    res = df.resample("H").ohlc()
    exp.columns = pd.MultiIndex.from_tuples(
        [
            ("a", "c", "open"),
            ("a", "c", "high"),
            ("a", "c", "low"),
            ("a", "c", "close"),
            ("b", "d", "open"),
            ("b", "d", "high"),
            ("b", "d", "low"),
            ("b", "d", "close"),
        ]
    )
    assert_frame_equal(exp, res)

    # dupe columns fail atm
    # df.columns = ['PRICE', 'PRICE']


def test_resample_dup_index():

    # GH 4812
    # dup columns with resample raising
    df = DataFrame(
        np.random.randn(4, 12),
        index=[2000, 2000, 2000, 2000],
        columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)],
    )
    df.iloc[3, :] = np.nan
    result = df.resample("Q", axis=1).mean()
    expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean()
    expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)]
    assert_frame_equal(result, expected)


def test_resample_reresample():
    dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
    s = Series(np.random.rand(len(dti)), dti)
    bs = s.resample("B", closed="right", label="right").mean()
    result = bs.resample("8H").mean()
    assert len(result) == 22
    assert isinstance(result.index.freq, offsets.DateOffset)
    assert result.index.freq == offsets.Hour(8)


def test_resample_timestamp_to_period(simple_date_range_series):
    ts = simple_date_range_series("1/1/1990", "1/1/2000")

    result = ts.resample("A-DEC", kind="period").mean()
    expected = ts.resample("A-DEC").mean()
    expected.index = period_range("1990", "2000", freq="a-dec")
    assert_series_equal(result, expected)

    result = ts.resample("A-JUN", kind="period").mean()
    expected = ts.resample("A-JUN").mean()
    expected.index = period_range("1990", "2000", freq="a-jun")
    assert_series_equal(result, expected)

    result = ts.resample("M", kind="period").mean()
    expected = ts.resample("M").mean()
    expected.index = period_range("1990-01", "2000-01", freq="M")
    assert_series_equal(result, expected)

    result = ts.resample("M", kind="period").mean()
    expected = ts.resample("M").mean()
    expected.index = period_range("1990-01", "2000-01", freq="M")
    assert_series_equal(result, expected)


def test_ohlc_5min():
    def _ohlc(group):
        if isna(group).all():
            return np.repeat(np.nan, 4)
        return [group[0], group.max(), group.min(), group[-1]]

    rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s")
    ts = Series(np.random.randn(len(rng)), index=rng)

    resampled = ts.resample("5min", closed="right", label="right").ohlc()

    assert (resampled.loc["1/1/2000 00:00"] == ts[0]).all()

    exp = _ohlc(ts[1:31])
    assert (resampled.loc["1/1/2000 00:05"] == exp).all()

    exp = _ohlc(ts["1/1/2000 5:55:01":])
    assert (resampled.loc["1/1/2000 6:00:00"] == exp).all()


def test_downsample_non_unique():
    rng = date_range("1/1/2000", "2/29/2000")
    rng2 = rng.repeat(5).values
    ts = Series(np.random.randn(len(rng2)), index=rng2)

    result = ts.resample("M").mean()

    expected = ts.groupby(lambda x: x.month).mean()
    assert len(result) == 2
    assert_almost_equal(result[0], expected[1])
    assert_almost_equal(result[1], expected[2])


def test_asfreq_non_unique():
    # GH #1077
    rng = date_range("1/1/2000", "2/29/2000")
    rng2 = rng.repeat(2).values
    ts = Series(np.random.randn(len(rng2)), index=rng2)

    msg = "cannot reindex from a duplicate axis"
    with pytest.raises(ValueError, match=msg):
        ts.asfreq("B")


def test_resample_axis1():
    rng = date_range("1/1/2000", "2/29/2000")
    df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"])

    result = df.resample("M", axis=1).mean()
    expected = df.T.resample("M").mean().T
    tm.assert_frame_equal(result, expected)


def test_resample_anchored_ticks():
    # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
    # "anchor" the origin at midnight so we get regular intervals rather
    # than starting from the first timestamp which might start in the
    # middle of a desired interval

    rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s")
    ts = Series(np.random.randn(len(rng)), index=rng)
    ts[:2] = np.nan  # so results are the same

    freqs = ["t", "5t", "15t", "30t", "4h", "12h"]
    for freq in freqs:
        result = ts[2:].resample(freq, closed="left", label="left").mean()
        expected = ts.resample(freq, closed="left", label="left").mean()
        assert_series_equal(result, expected)


def test_resample_single_group():
    mysum = lambda x: x.sum()

    rng = date_range("2000-1-1", "2000-2-10", freq="D")
    ts = Series(np.random.randn(len(rng)), index=rng)
    assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum))

    rng = date_range("2000-1-1", "2000-1-10", freq="D")
    ts = Series(np.random.randn(len(rng)), index=rng)
    assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum))

    # GH 3849
    s = Series(
        [30.1, 31.6],
        index=[Timestamp("20070915 15:30:00"), Timestamp("20070915 15:40:00")],
    )
    expected = Series([0.75], index=[Timestamp("20070915")])
    result = s.resample("D").apply(lambda x: np.std(x))
    assert_series_equal(result, expected)


def test_resample_base():
    rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s")
    ts = Series(np.random.randn(len(rng)), index=rng)

    resampled = ts.resample("5min", base=2).mean()
    exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min")
    tm.assert_index_equal(resampled.index, exp_rng)


def test_resample_float_base():
    # GH25161
    dt = pd.to_datetime(
        ["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"]
    )
    s = Series(np.arange(3), index=dt)

    base = 17 + 43.51 / 60
    result = s.resample("3min", base=base).size()
    expected = Series(3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"]))
    assert_series_equal(result, expected)


def test_resample_daily_anchored():
    rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T")
    ts = Series(np.random.randn(len(rng)), index=rng)
    ts[:2] = np.nan  # so results are the same

    result = ts[2:].resample("D", closed="left", label="left").mean()
    expected = ts.resample("D", closed="left", label="left").mean()
    assert_series_equal(result, expected)


def test_resample_to_period_monthly_buglet():
    # GH #1259

    rng = date_range("1/1/2000", "12/31/2000")
    ts = Series(np.random.randn(len(rng)), index=rng)

    result = ts.resample("M", kind="period").mean()
    exp_index = period_range("Jan-2000", "Dec-2000", freq="M")
    tm.assert_index_equal(result.index, exp_index)


def test_period_with_agg():

    # aggregate a period resampler with a lambda
    s2 = Series(
        np.random.randint(0, 5, 50),
        index=pd.period_range("2012-01-01", freq="H", periods=50),
        dtype="float64",
    )

    expected = s2.to_timestamp().resample("D").mean().to_period()
    result = s2.resample("D").agg(lambda x: x.mean())
    assert_series_equal(result, expected)


def test_resample_segfault():
    # GH 8573
    # segfaulting in older versions
    all_wins_and_wagers = [
        (1, datetime(2013, 10, 1, 16, 20), 1, 0),
        (2, datetime(2013, 10, 1, 16, 10), 1, 0),
        (2, datetime(2013, 10, 1, 18, 15), 1, 0),
        (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0),
    ]

    df = DataFrame.from_records(
        all_wins_and_wagers, columns=("ID", "timestamp", "A", "B")
    ).set_index("timestamp")
    result = df.groupby("ID").resample("5min").sum()
    expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
    assert_frame_equal(result, expected)


def test_resample_dtype_preservation():

    # GH 12202
    # validation tests for dtype preservation

    df = DataFrame(
        {
            "date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
            "group": [1, 1, 2, 2],
            "val": Series([5, 6, 7, 8], dtype="int32"),
        }
    ).set_index("date")

    result = df.resample("1D").ffill()
    assert result.val.dtype == np.int32

    result = df.groupby("group").resample("1D").ffill()
    assert result.val.dtype == np.int32


def test_resample_dtype_coerceion():

    pytest.importorskip("scipy.interpolate")

    # GH 16361
    df = {"a": [1, 3, 1, 4]}
    df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04"))

    expected = df.astype("float64").resample("H").mean()["a"].interpolate("cubic")

    result = df.resample("H")["a"].mean().interpolate("cubic")
    tm.assert_series_equal(result, expected)

    result = df.resample("H").mean()["a"].interpolate("cubic")
    tm.assert_series_equal(result, expected)


def test_weekly_resample_buglet():
    # #1327
    rng = date_range("1/1/2000", freq="B", periods=20)
    ts = Series(np.random.randn(len(rng)), index=rng)

    resampled = ts.resample("W").mean()
    expected = ts.resample("W-SUN").mean()
    assert_series_equal(resampled, expected)


def test_monthly_resample_error():
    # #1451
    dates = date_range("4/16/2012 20:00", periods=5000, freq="h")
    ts = Series(np.random.randn(len(dates)), index=dates)
    # it works!
    ts.resample("M")


def test_nanosecond_resample_error():
    # GH 12307 - Values falls after last bin when
    # Resampling using pd.tseries.offsets.Nano as period
    start = 1443707890427
    exp_start = 1443707890400
    indx = pd.date_range(start=pd.to_datetime(start), periods=10, freq="100n")
    ts = Series(range(len(indx)), index=indx)
    r = ts.resample(pd.tseries.offsets.Nano(100))
    result = r.agg("mean")

    exp_indx = pd.date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n")
    exp = Series(range(len(exp_indx)), index=exp_indx)

    assert_series_equal(result, exp)


def test_resample_anchored_intraday(simple_date_range_series):
    # #1471, #1458

    rng = date_range("1/1/2012", "4/1/2012", freq="100min")
    df = DataFrame(rng.month, index=rng)

    result = df.resample("M").mean()
    expected = df.resample("M", kind="period").mean().to_timestamp(how="end")
    expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
    tm.assert_frame_equal(result, expected)

    result = df.resample("M", closed="left").mean()
    exp = df.tshift(1, freq="D").resample("M", kind="period").mean()
    exp = exp.to_timestamp(how="end")

    exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D")
    tm.assert_frame_equal(result, exp)

    rng = date_range("1/1/2012", "4/1/2012", freq="100min")
    df = DataFrame(rng.month, index=rng)

    result = df.resample("Q").mean()
    expected = df.resample("Q", kind="period").mean().to_timestamp(how="end")
    expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
    tm.assert_frame_equal(result, expected)

    result = df.resample("Q", closed="left").mean()
    expected = df.tshift(1, freq="D").resample("Q", kind="period", closed="left").mean()
    expected = expected.to_timestamp(how="end")
    expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
    tm.assert_frame_equal(result, expected)

    ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h")
    resampled = ts.resample("M").mean()
    assert len(resampled) == 1


def test_resample_anchored_monthstart(simple_date_range_series):
    ts = simple_date_range_series("1/1/2000", "12/31/2002")

    freqs = ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"]

    for freq in freqs:
        ts.resample(freq).mean()


def test_resample_anchored_multiday():
    # When resampling a range spanning multiple days, ensure that the
    # start date gets used to determine the offset.  Fixes issue where
    # a one day period is not a multiple of the frequency.
    #
    # See: https://github.com/pandas-dev/pandas/issues/8683

    index = pd.date_range(
        "2014-10-14 23:06:23.206", periods=3, freq="400L"
    ) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")

    s = Series(np.random.randn(5), index=index)

    # Ensure left closing works
    result = s.resample("2200L").mean()
    assert result.index[-1] == Timestamp("2014-10-15 23:00:02.000")

    # Ensure right closing works
    result = s.resample("2200L", label="right").mean()
    assert result.index[-1] == Timestamp("2014-10-15 23:00:04.200")


def test_corner_cases(simple_period_range_series, simple_date_range_series):
    # miscellaneous test coverage

    rng = date_range("1/1/2000", periods=12, freq="t")
    ts = Series(np.random.randn(len(rng)), index=rng)

    result = ts.resample("5t", closed="right", label="left").mean()
    ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t")
    tm.assert_index_equal(result.index, ex_index)

    len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0]
    # it works
    result = len0pts.resample("A-DEC").mean()
    assert len(result) == 0

    # resample to periods
    ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h")
    result = ts.resample("M", kind="period").mean()
    assert len(result) == 1
    assert result.index[0] == Period("2000-04", freq="M")


def test_anchored_lowercase_buglet():
    dates = date_range("4/16/2012 20:00", periods=50000, freq="s")
    ts = Series(np.random.randn(len(dates)), index=dates)
    # it works!
    ts.resample("d").mean()


def test_upsample_apply_functions():
    # #1596
    rng = pd.date_range("2012-06-12", periods=4, freq="h")

    ts = Series(np.random.randn(len(rng)), index=rng)

    result = ts.resample("20min").aggregate(["mean", "sum"])
    assert isinstance(result, DataFrame)


def test_resample_not_monotonic():
    rng = pd.date_range("2012-06-12", periods=200, freq="h")
    ts = Series(np.random.randn(len(rng)), index=rng)

    ts = ts.take(np.random.permutation(len(ts)))

    result = ts.resample("D").sum()
    exp = ts.sort_index().resample("D").sum()
    assert_series_equal(result, exp)


def test_resample_median_bug_1688():

    for dtype in ["int64", "int32", "float64", "float32"]:
        df = DataFrame(
            [1, 2],
            index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)],
            dtype=dtype,
        )

        result = df.resample("T").apply(lambda x: x.mean())
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)

        result = df.resample("T").median()
        exp = df.asfreq("T")
        tm.assert_frame_equal(result, exp)


def test_how_lambda_functions(simple_date_range_series):

    ts = simple_date_range_series("1/1/2000", "4/1/2000")

    result = ts.resample("M").apply(lambda x: x.mean())
    exp = ts.resample("M").mean()
    tm.assert_series_equal(result, exp)

    foo_exp = ts.resample("M").mean()
    foo_exp.name = "foo"
    bar_exp = ts.resample("M").std()
    bar_exp.name = "bar"

    result = ts.resample("M").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)])
    result.columns = ["foo", "bar"]
    tm.assert_series_equal(result["foo"], foo_exp)
    tm.assert_series_equal(result["bar"], bar_exp)

    # this is a MI Series, so comparing the names of the results
    # doesn't make sense
    result = ts.resample("M").aggregate(
        {"foo": lambda x: x.mean(), "bar": lambda x: x.std(ddof=1)}
    )
    tm.assert_series_equal(result["foo"], foo_exp, check_names=False)
    tm.assert_series_equal(result["bar"], bar_exp, check_names=False)


def test_resample_unequal_times():
    # #1772
    start = datetime(1999, 3, 1, 5)
    # end hour is less than start
    end = datetime(2012, 7, 31, 4)
    bad_ind = date_range(start, end, freq="30min")
    df = DataFrame({"close": 1}, index=bad_ind)

    # it works!
    df.resample("AS").sum()


def test_resample_consistency():

    # GH 6418
    # resample with bfill / limit / reindex consistency

    i30 = pd.date_range("2002-02-02", periods=4, freq="30T")
    s = Series(np.arange(4.0), index=i30)
    s[2] = np.NaN

    # Upsample by factor 3 with reindex() and resample() methods:
    i10 = pd.date_range(i30[0], i30[-1], freq="10T")

    s10 = s.reindex(index=i10, method="bfill")
    s10_2 = s.reindex(index=i10, method="bfill", limit=2)
    rl = s.reindex_like(s10, method="bfill", limit=2)
    r10_2 = s.resample("10Min").bfill(limit=2)
    r10 = s.resample("10Min").bfill()

    # s10_2, r10, r10_2, rl should all be equal
    assert_series_equal(s10_2, r10)
    assert_series_equal(s10_2, r10_2)
    assert_series_equal(s10_2, rl)


def test_resample_timegrouper():
    # GH 7227
    dates1 = [
        datetime(2014, 10, 1),
        datetime(2014, 9, 3),
        datetime(2014, 11, 5),
        datetime(2014, 9, 5),
        datetime(2014, 10, 8),
        datetime(2014, 7, 15),
    ]

    dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:]
    dates3 = [pd.NaT] + dates1 + [pd.NaT]

    for dates in [dates1, dates2, dates3]:
        df = DataFrame(dict(A=dates, B=np.arange(len(dates))))
        result = df.set_index("A").resample("M").count()
        exp_idx = pd.DatetimeIndex(
            ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"],
            freq="M",
            name="A",
        )
        expected = DataFrame({"B": [1, 0, 2, 2, 1]}, index=exp_idx)
        assert_frame_equal(result, expected)

        result = df.groupby(pd.Grouper(freq="M", key="A")).count()
        assert_frame_equal(result, expected)

        df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(len(dates))))
        result = df.set_index("A").resample("M").count()
        expected = DataFrame(
            {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]},
            index=exp_idx,
            columns=["B", "C"],
        )
        assert_frame_equal(result, expected)

        result = df.groupby(pd.Grouper(freq="M", key="A")).count()
        assert_frame_equal(result, expected)


def test_resample_nunique():

    # GH 12352
    df = DataFrame(
        {
            "ID": {
                Timestamp("2015-06-05 00:00:00"): "0010100903",
                Timestamp("2015-06-08 00:00:00"): "0010150847",
            },
            "DATE": {
                Timestamp("2015-06-05 00:00:00"): "2015-06-05",
                Timestamp("2015-06-08 00:00:00"): "2015-06-08",
            },
        }
    )
    r = df.resample("D")
    g = df.groupby(pd.Grouper(freq="D"))
    expected = df.groupby(pd.Grouper(freq="D")).ID.apply(lambda x: x.nunique())
    assert expected.name == "ID"

    for t in [r, g]:
        result = r.ID.nunique()
        assert_series_equal(result, expected)

    result = df.ID.resample("D").nunique()
    assert_series_equal(result, expected)

    result = df.ID.groupby(pd.Grouper(freq="D")).nunique()
    assert_series_equal(result, expected)


def test_resample_nunique_preserves_column_level_names():
    # see gh-23222
    df = tm.makeTimeDataFrame(freq="1D").abs()
    df.columns = pd.MultiIndex.from_arrays(
        [df.columns.tolist()] * 2, names=["lev0", "lev1"]
    )
    result = df.resample("1h").nunique()
    tm.assert_index_equal(df.columns, result.columns)


def test_resample_nunique_with_date_gap():
    # GH 13453
    index = pd.date_range("1-1-2000", "2-15-2000", freq="h")
    index2 = pd.date_range("4-15-2000", "5-15-2000", freq="h")
    index3 = index.append(index2)
    s = Series(range(len(index3)), index=index3, dtype="int64")
    r = s.resample("M")

    # Since all elements are unique, these should all be the same
    results = [r.count(), r.nunique(), r.agg(Series.nunique), r.agg("nunique")]

    assert_series_equal(results[0], results[1])
    assert_series_equal(results[0], results[2])
    assert_series_equal(results[0], results[3])


@pytest.mark.parametrize("n", [10000, 100000])
@pytest.mark.parametrize("k", [10, 100, 1000])
def test_resample_group_info(n, k):
    # GH10914

    # use a fixed seed to always have the same uniques
    prng = np.random.RandomState(1234)

    dr = date_range(start="2015-08-27", periods=n // 10, freq="T")
    ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n))

    left = ts.resample("30T").nunique()
    ix = date_range(start=ts.index.min(), end=ts.index.max(), freq="30T")

    vals = ts.values
    bins = np.searchsorted(ix.values, ts.index, side="right")

    sorter = np.lexsort((vals, bins))
    vals, bins = vals[sorter], bins[sorter]

    mask = np.r_[True, vals[1:] != vals[:-1]]
    mask |= np.r_[True, bins[1:] != bins[:-1]]

    arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype("int64", copy=False)
    right = Series(arr, index=ix)

    assert_series_equal(left, right)


def test_resample_size():
    n = 10000
    dr = date_range("2015-09-19", periods=n, freq="T")
    ts = Series(np.random.randn(n), index=np.random.choice(dr, n))

    left = ts.resample("7T").size()
    ix = date_range(start=left.index.min(), end=ts.index.max(), freq="7T")

    bins = np.searchsorted(ix.values, ts.index.values, side="right")
    val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype("int64", copy=False)

    right = Series(val, index=ix)
    assert_series_equal(left, right)


def test_resample_across_dst():
    # The test resamples a DatetimeIndex with values before and after a
    # DST change
    # Issue: 14682

    # The DatetimeIndex we will start with
    # (note that DST happens at 03:00+02:00 -> 02:00+01:00)
    # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
    df1 = DataFrame([1477786980, 1477790580], columns=["ts"])
    dti1 = DatetimeIndex(
        pd.to_datetime(df1.ts, unit="s")
        .dt.tz_localize("UTC")
        .dt.tz_convert("Europe/Madrid")
    )

    # The expected DatetimeIndex after resampling.
    # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
    df2 = DataFrame([1477785600, 1477789200], columns=["ts"])
    dti2 = DatetimeIndex(
        pd.to_datetime(df2.ts, unit="s")
        .dt.tz_localize("UTC")
        .dt.tz_convert("Europe/Madrid")
    )
    df = DataFrame([5, 5], index=dti1)

    result = df.resample(rule="H").sum()
    expected = DataFrame([5, 5], index=dti2)

    assert_frame_equal(result, expected)


def test_groupby_with_dst_time_change():
    # GH 24972
    index = pd.DatetimeIndex(
        [1478064900001000000, 1480037118776792000], tz="UTC"
    ).tz_convert("America/Chicago")

    df = pd.DataFrame([1, 2], index=index)
    result = df.groupby(pd.Grouper(freq="1d")).last()
    expected_index_values = pd.date_range(
        "2016-11-02", "2016-11-24", freq="d", tz="America/Chicago"
    )

    index = pd.DatetimeIndex(expected_index_values)
    expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index)
    assert_frame_equal(result, expected)


def test_resample_dst_anchor():
    # 5172
    dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern")
    df = DataFrame([5], index=dti)
    assert_frame_equal(
        df.resample(rule="D").sum(), DataFrame([5], index=df.index.normalize())
    )
    df.resample(rule="MS").sum()
    assert_frame_equal(
        df.resample(rule="MS").sum(),
        DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], tz="US/Eastern")),
    )

    dti = date_range("2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris")
    values = range(dti.size)
    df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64")
    how = {"a": "min", "b": "max", "c": "count"}

    assert_frame_equal(
        df.resample("W-MON").agg(how)[["a", "b", "c"]],
        DataFrame(
            {
                "a": [0, 48, 384, 720, 1056, 1394],
                "b": [47, 383, 719, 1055, 1393, 1586],
                "c": [48, 336, 336, 336, 338, 193],
            },
            index=date_range("9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"),
        ),
        "W-MON Frequency",
    )

    assert_frame_equal(
        df.resample("2W-MON").agg(how)[["a", "b", "c"]],
        DataFrame(
            {
                "a": [0, 48, 720, 1394],
                "b": [47, 719, 1393, 1586],
                "c": [48, 672, 674, 193],
            },
            index=date_range(
                "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris"
            ),
        ),
        "2W-MON Frequency",
    )

    assert_frame_equal(
        df.resample("MS").agg(how)[["a", "b", "c"]],
        DataFrame(
            {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]},
            index=date_range("9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris"),
        ),
        "MS Frequency",
    )

    assert_frame_equal(
        df.resample("2MS").agg(how)[["a", "b", "c"]],
        DataFrame(
            {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]},
            index=date_range("9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"),
        ),
        "2MS Frequency",
    )

    df_daily = df["10/26/2013":"10/29/2013"]
    assert_frame_equal(
        df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[
            ["a", "b", "c"]
        ],
        DataFrame(
            {
                "a": [1248, 1296, 1346, 1394],
                "b": [1295, 1345, 1393, 1441],
                "c": [48, 50, 48, 48],
            },
            index=date_range("10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"),
        ),
        "D Frequency",
    )


def test_downsample_across_dst():
    # GH 8531
    tz = pytz.timezone("Europe/Berlin")
    dt = datetime(2014, 10, 26)
    dates = date_range(tz.localize(dt), periods=4, freq="2H")
    result = Series(5, index=dates).resample("H").mean()
    expected = Series(
        [5.0, np.nan] * 3 + [5.0],
        index=date_range(tz.localize(dt), periods=7, freq="H"),
    )
    tm.assert_series_equal(result, expected)


def test_downsample_across_dst_weekly():
    # GH 9119, GH 21459
    df = DataFrame(
        index=DatetimeIndex(
            ["2017-03-25", "2017-03-26", "2017-03-27", "2017-03-28", "2017-03-29"],
            tz="Europe/Amsterdam",
        ),
        data=[11, 12, 13, 14, 15],
    )
    result = df.resample("1W").sum()
    expected = DataFrame(
        [23, 42],
        index=pd.DatetimeIndex(["2017-03-26", "2017-04-02"], tz="Europe/Amsterdam"),
    )
    tm.assert_frame_equal(result, expected)

    idx = pd.date_range("2013-04-01", "2013-05-01", tz="Europe/London", freq="H")
    s = Series(index=idx)
    result = s.resample("W").mean()
    expected = Series(
        index=pd.date_range("2013-04-07", freq="W", periods=5, tz="Europe/London")
    )
    tm.assert_series_equal(result, expected)


def test_resample_with_nat():
    # GH 13020
    index = DatetimeIndex(
        [
            pd.NaT,
            "1970-01-01 00:00:00",
            pd.NaT,
            "1970-01-01 00:00:01",
            "1970-01-01 00:00:02",
        ]
    )
    frame = DataFrame([2, 3, 5, 7, 11], index=index)

    index_1s = DatetimeIndex(
        ["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"]
    )
    frame_1s = DataFrame([3, 7, 11], index=index_1s)
    assert_frame_equal(frame.resample("1s").mean(), frame_1s)

    index_2s = DatetimeIndex(["1970-01-01 00:00:00", "1970-01-01 00:00:02"])
    frame_2s = DataFrame([5, 11], index=index_2s)
    assert_frame_equal(frame.resample("2s").mean(), frame_2s)

    index_3s = DatetimeIndex(["1970-01-01 00:00:00"])
    frame_3s = DataFrame([7], index=index_3s)
    assert_frame_equal(frame.resample("3s").mean(), frame_3s)

    assert_frame_equal(frame.resample("60s").mean(), frame_3s)


def test_resample_datetime_values():
    # GH 13119
    # check that datetime dtype is preserved when NaT values are
    # introduced by the resampling

    dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)]
    df = DataFrame({"timestamp": dates}, index=dates)

    exp = Series(
        [datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)],
        index=date_range("2016-01-15", periods=3, freq="2D"),
        name="timestamp",
    )

    res = df.resample("2D").first()["timestamp"]
    tm.assert_series_equal(res, exp)
    res = df["timestamp"].resample("2D").first()
    tm.assert_series_equal(res, exp)


def test_resample_apply_with_additional_args(series):
    # GH 14615
    def f(data, add_arg):
        return np.mean(data) * add_arg

    multiplier = 10
    result = series.resample("D").apply(f, multiplier)
    expected = series.resample("D").mean().multiply(multiplier)
    tm.assert_series_equal(result, expected)

    # Testing as kwarg
    result = series.resample("D").apply(f, add_arg=multiplier)
    expected = series.resample("D").mean().multiply(multiplier)
    tm.assert_series_equal(result, expected)

    # Testing dataframe
    df = pd.DataFrame({"A": 1, "B": 2}, index=pd.date_range("2017", periods=10))
    result = df.groupby("A").resample("D").agg(f, multiplier)
    expected = df.groupby("A").resample("D").mean().multiply(multiplier)
    assert_frame_equal(result, expected)


@pytest.mark.parametrize("k", [1, 2, 3])
@pytest.mark.parametrize(
    "n1, freq1, n2, freq2",
    [
        (30, "S", 0.5, "Min"),
        (60, "S", 1, "Min"),
        (3600, "S", 1, "H"),
        (60, "Min", 1, "H"),
        (21600, "S", 0.25, "D"),
        (86400, "S", 1, "D"),
        (43200, "S", 0.5, "D"),
        (1440, "Min", 1, "D"),
        (12, "H", 0.5, "D"),
        (24, "H", 1, "D"),
    ],
)
def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k):
    # GH 24127
    n1_ = n1 * k
    n2_ = n2 * k
    s = pd.Series(
        0, index=pd.date_range("19910905 13:00", "19911005 07:00", freq=freq1)
    )
    s = s + range(len(s))

    result1 = s.resample(str(n1_) + freq1).mean()
    result2 = s.resample(str(n2_) + freq2).mean()
    assert_series_equal(result1, result2)


@pytest.mark.parametrize(
    "first,last,offset,exp_first,exp_last",
    [
        ("19910905", "19920406", "D", "19910905", "19920407"),
        ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920407"),
        ("19910905 06:00", "19920406 06:00", "H", "19910905 06:00", "19920406 07:00"),
        ("19910906", "19920406", "M", "19910831", "19920430"),
        ("19910831", "19920430", "M", "19910831", "19920531"),
        ("1991-08", "1992-04", "M", "19910831", "19920531"),
    ],
)
def test_get_timestamp_range_edges(first, last, offset, exp_first, exp_last):
    first = pd.Period(first)
    first = first.to_timestamp(first.freq)
    last = pd.Period(last)
    last = last.to_timestamp(last.freq)

    exp_first = pd.Timestamp(exp_first, freq=offset)
    exp_last = pd.Timestamp(exp_last, freq=offset)

    offset = pd.tseries.frequencies.to_offset(offset)
    result = _get_timestamp_range_edges(first, last, offset)
    expected = (exp_first, exp_last)
    assert result == expected
aaronreidsmith / pandas python

Version: 0.25.3

/ tests / resample / test_datetime_index.py

Products

About

Resources

Contact Gemfury