import numpy as np
import pytest
from numpy.testing import (
assert_, assert_allclose, assert_warns, assert_raises)
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.statespace import sarimax
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import (
dowj, lake, oshorts)
from statsmodels.tsa.arima.estimators.burg import burg
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
from statsmodels.tsa.arima.estimators.innovations import (
innovations, innovations_mle)
@pytest.mark.low_precision('Test against Example 5.1.5 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_515():
# Difference and demean the series
endog = dowj.diff().iloc[1:]
# Innvations algorithm (MA)
p, _ = innovations(endog, ma_order=17, demean=True)
# First BD show the MA(2) coefficients resulting from the m=17 computations
assert_allclose(p[17].ma_params[:2], [.4269, .2704], atol=1e-4)
assert_allclose(p[17].sigma2, 0.1122, atol=1e-4)
# Then they separately show the full MA(17) coefficients
desired = [.4269, .2704, .1183, .1589, .1355, .1568, .1284, -.0060, .0148,
-.0017, .1974, -.0463, .2023, .1285, -.0213, -.2575, .0760]
assert_allclose(p[17].ma_params, desired, atol=1e-4)
def check_innovations_ma_itsmr(lake):
# Test against R itsmr::ia; see results/results_innovations.R
ia, _ = innovations(lake, 10, demean=True)
desired = [
1.0816255264, 0.7781248438, 0.5367164430, 0.3291559246, 0.3160039850,
0.2513754550, 0.2051536531, 0.1441070313, 0.3431868340, 0.1827400798]
assert_allclose(ia[10].ma_params, desired)
# itsmr::ia returns the innovations algorithm estimate of the variance
u, v = arma_innovations(np.array(lake) - np.mean(lake),
ma_params=ia[10].ma_params, sigma2=1)
desired_sigma2 = 0.4523684344
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_innovations_ma_itsmr():
# Note: apparently itsmr automatically demeans (there is no option to
# control this)
endog = lake.copy()
check_innovations_ma_itsmr(endog) # Pandas series
check_innovations_ma_itsmr(endog.values) # Numpy array
check_innovations_ma_itsmr(endog.tolist()) # Python list
def test_innovations_ma_invalid():
endog = np.arange(2)
assert_raises(ValueError, innovations, endog, ma_order=2)
assert_raises(ValueError, innovations, endog, ma_order=-1)
assert_raises(ValueError, innovations, endog, ma_order=1.5)
endog = np.arange(10)
assert_raises(ValueError, innovations, endog, ma_order=[1, 3])
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_524():
# Difference and demean the series
endog = dowj.diff().iloc[1:]
# Use Burg method to get initial coefficients for MLE
initial, _ = burg(endog, ar_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
start_params=initial.params)
assert_allclose(p.ar_params, 0.4471, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
' (2016)')
@pytest.mark.xfail(reason='Suspicious result reported in Brockwell and Davis'
' (2016).')
def test_brockwell_davis_example_524_variance():
# See `test_brockwell_davis_example_524` for the main test
# TODO: the test for sigma2 fails, but the value reported by BD (0.02117)
# is suspicious. For example, the Burg results have an AR coefficient of
# 0.4371 and sigma2 = 0.1423. It seems unlikely that the small difference
# in AR coefficient would result in an order of magniture reduction in
# sigma2 (see test_burg::test_brockwell_davis_example_513). Should run
# this in the ITSM program to check its output.
endog = dowj.diff().iloc[1:]
# Use Burg method to get initial coefficients for MLE
initial, _ = burg(endog, ar_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
start_params=initial.params)
assert_allclose(p.sigma2, 0.02117, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.2.5 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_525():
# Difference and demean the series
endog = lake.copy()
# Use HR method to get initial coefficients for MLE
initial, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True,
start_params=initial.params)
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
# Fit MLE via innovations algorithm, with default starting parameters
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True)
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.4.1 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_541():
# Difference and demean the series
endog = oshorts.copy()
# Use innovations MA method to get initial coefficients for MLE
initial, _ = innovations(endog, ma_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(0, 0, 1), demean=True,
start_params=initial[1].params)
assert_allclose(p.ma_params, -0.818, atol=1e-3)
# TODO: the test for sigma2 fails; we get 2040.85 whereas BD reports
# 2040.75. Unclear if this is optimizers finding different maxima, or a
# reporting error by BD (i.e. typo where the 8 got reported as a 7). Should
# check this out with ITSM program. NB: state space also finds 2040.85 as
# the MLE value.
# assert_allclose(p.sigma2, 2040.75, atol=1e-2)
def test_innovations_mle_statespace():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, 0, np.var(endog)]
p, mleres = innovations_mle(endog, order=(1, 0, 1), demean=False,
start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(1, 0, 1))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, order=(1, 0, 1), demean=False)
# (doesn't need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_statespace_seasonal():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, np.var(endog)]
p, mleres = innovations_mle(endog, seasonal_order=(1, 0, 0, 4),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(0, 0, 0), seasonal_order=(1, 0, 0, 4))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, seasonal_order=(1, 0, 0, 4), demean=False)
# (doesn't need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_statespace_nonconsecutive():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, 0, np.var(endog)]
p, mleres = innovations_mle(endog, order=([0, 1], 0, [0, 1]),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=([0, 1], 0, [0, 1]))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, order=([0, 1], 0, [0, 1]), demean=False)
# (doesn't need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_integrated():
endog = np.r_[0, np.cumsum(lake.copy())]
start_params = [0, np.var(lake.copy())]
with assert_warns(UserWarning):
p, mleres = innovations_mle(endog, order=(1, 1, 0),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(1, 1, 0),
simple_differencing=True)
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
# Note: atol is required only due to precision issues on Windows
assert_allclose(p.params, res2.params, atol=1e-6)
# Test that the result is equivalent to order=(1, 0, 0) on the differenced
# data
p2, _ = innovations_mle(lake.copy(), order=(1, 0, 0), demean=False,
start_params=start_params)
# (doesn't need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_misc():
endog = np.arange(20)**2 * 1.0
# Check that when Hannan-Rissanen estimates non-stationary starting
# parameters, innovations_mle sets it to zero
hr, _ = hannan_rissanen(endog, ar_order=1, demean=False)
assert_(hr.ar_params[0] > 1)
_, res = innovations_mle(endog, order=(1, 0, 0))
assert_allclose(res.start_params[0], 0)
# Check that when Hannan-Rissanen estimates non-invertible starting
# parameters, innovations_mle sets it to zero
hr, _ = hannan_rissanen(endog, ma_order=1, demean=False)
assert_(hr.ma_params[0] > 1)
_, res = innovations_mle(endog, order=(0, 0, 1))
assert_allclose(res.start_params[0], 0)
def test_innovations_mle_invalid():
endog = np.arange(2) * 1.0
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 2))
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, -1))
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1.5))
endog = lake.copy()
assert_raises(ValueError, innovations_mle, endog, order=(1, 0, 0),
start_params=[1., 1.])
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1),
start_params=[1., 1.])