from datetime import datetime
import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
from statsmodels.tsa.base.tsa_model import TimeSeriesModel
from statsmodels.tools.testing import assert_equal
from statsmodels.tools.sm_exceptions import ValueWarning
def test_pandas_nodates_index():
data = [988, 819, 964]
dates = ['a', 'b', 'c']
s = pd.Series(data, index=dates)
# TODO: Remove this, this is now valid
# npt.assert_raises(ValueError, TimeSeriesModel, s)
# Test with a non-date index that does not raise an exception because it
# can be coerced into a nanosecond DatetimeIndex
data = [988, 819, 964]
# index=pd.date_range('1970-01-01', periods=3, freq='QS')
index = pd.to_datetime([100, 101, 102])
s = pd.Series(data, index=index)
actual_str = (index[0].strftime('%Y-%m-%d %H:%M:%S.%f') +
str(index[0].value))
assert_equal(actual_str, '1970-01-01 00:00:00.000000100')
with pytest.warns(ValueWarning, match="No frequency information"):
mod = TimeSeriesModel(s)
start, end, out_of_sample, _ = mod._get_prediction_index(0, 4)
assert_equal(len(mod.data.predict_dates), 5)
def test_predict_freq():
# test that predicted dates have same frequency
x = np.arange(1,36.)
# there's a bug in pandas up to 0.10.2 for YearBegin
#dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
dates = pd.date_range("1972-4-30", "2006-4-30", freq="A-APR")
series = pd.Series(x, index=dates)
model = TimeSeriesModel(series)
#npt.assert_(model.data.freq == "AS-APR")
assert_equal(model._index.freqstr, "A-APR")
start, end, out_of_sample, _ = (
model._get_prediction_index("2006-4-30", "2016-4-30"))
predict_dates = model.data.predict_dates
#expected_dates = date_range("2006-12-31", "2016-12-31",
# freq="AS-APR")
expected_dates = pd.date_range("2006-4-30", "2016-4-30", freq="A-APR")
assert_equal(predict_dates, expected_dates)
#ptesting.assert_series_equal(predict_dates, expected_dates)
def test_keyerror_start_date():
x = np.arange(1,36.)
# dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
dates = pd.date_range("1972-4-30", "2006-4-30", freq="A-APR")
series = pd.Series(x, index=dates)
model = TimeSeriesModel(series)
npt.assert_raises(KeyError, model._get_prediction_index, "1970-4-30", None)
def test_period_index():
# test 1285
dates = pd.period_range(start="1/1/1990", periods=20, freq="M")
x = np.arange(1, 21.)
model = TimeSeriesModel(pd.Series(x, index=dates))
assert_equal(model._index.freqstr, "M")
model = TimeSeriesModel(pd.Series(x, index=dates))
npt.assert_(model.data.freq == "M")
def test_pandas_dates():
data = [988, 819, 964]
dates = ['2016-01-01 12:00:00', '2016-02-01 12:00:00', '2016-03-01 12:00:00']
datetime_dates = pd.to_datetime(dates)
result = pd.Series(data=data, index=datetime_dates, name='price')
df = pd.DataFrame(data={'price': data}, index=pd.DatetimeIndex(dates, freq='MS'))
model = TimeSeriesModel(df['price'])
assert_equal(model.data.dates, result.index)
def test_get_predict_start_end():
index = pd.date_range(start='1970-01-01', end='1990-01-01', freq='AS')
endog = pd.Series(np.zeros(10), index[:10])
model = TimeSeriesModel(endog)
predict_starts = [1, '1971-01-01', datetime(1971, 1, 1), index[1]]
predict_ends = [20, '1990-01-01', datetime(1990, 1, 1), index[-1]]
desired = (1, 9, 11)
for start in predict_starts:
for end in predict_ends:
assert_equal(model._get_prediction_index(start, end)[:3], desired)