"""
Tests for recursive least squares models
Author: Chad Fulton
License: Simplified-BSD
"""
import numpy as np
import pandas as pd
import pytest
from scipy.stats import norm
import os
from statsmodels.datasets import macrodata
from statsmodels.regression.linear_model import OLS
from statsmodels.genmod.api import GLM
from statsmodels.tools.eval_measures import aic, bic
from statsmodels.regression.recursive_ls import RecursiveLS
from statsmodels.stats.diagnostic import recursive_olsresiduals
from statsmodels.tools import add_constant
from statsmodels.tools.sm_exceptions import ValueWarning
from numpy.testing import assert_equal, assert_raises, assert_allclose
current_path = os.path.dirname(os.path.abspath(__file__))
results_R_path = 'results' + os.sep + 'results_rls_R.csv'
results_R = pd.read_csv(current_path + os.sep + results_R_path)
results_stata_path = 'results' + os.sep + 'results_rls_stata.csv'
results_stata = pd.read_csv(current_path + os.sep + results_stata_path)
dta = macrodata.load_pandas().data
dta.index = pd.date_range(start='1959-01-01', end='2009-07-01', freq='QS')
endog = dta['cpi']
exog = add_constant(dta['m1'])
def test_endog():
# Tests for numpy input
mod = RecursiveLS(endog.values, exog.values)
res = mod.fit()
# Test the RLS estimates against OLS estimates
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
# Tests for 1-dim exog
mod = RecursiveLS(endog, dta['m1'].values)
res = mod.fit()
# Test the RLS estimates against OLS estimates
mod_ols = OLS(endog, dta['m1'])
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
def test_ols():
# More comprehensive tests against OLS estimates
mod = RecursiveLS(endog, dta['m1'])
res = mod.fit()
mod_ols = OLS(endog, dta['m1'])
res_ols = mod_ols.fit()
# Regression coefficients, standard errors, and estimated scale
assert_allclose(res.params, res_ols.params)
assert_allclose(res.bse, res_ols.bse)
# Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
# the called the ML estimator and sometimes (e.g. later in section 5)
# denoted \tilde \sigma_*^2
assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)
# OLS residuals are equivalent to smoothed forecast errors
# (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
# (this follows since the smoothed state simply contains the
# full-information estimates of the regression coefficients)
actual = (mod.endog[:, 0] -
np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
assert_allclose(actual, res_ols.resid)
# Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
# Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
# full OLS loglikelihood (i.e. without the scale concentrated out).
desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
assert_allclose(res.llf_recursive, desired)
# Alternatively, we can constrcut the concentrated OLS loglikelihood
# by computing the scale term with `nobs` in the denominator rather than
# `nobs - d`.
scale_alternative = np.sum((
res.standardized_forecasts_error[0, 1:] *
res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
scale=scale_alternative**0.5)).sum()
assert_allclose(llf_alternative, res_ols.llf)
# Prediction
actual = res.forecast(10, design=np.ones((1, 1, 10)))
assert_allclose(actual, res_ols.predict(np.ones((10, 1))))
# Sums of squares, R^2
assert_allclose(res.ess, res_ols.ess)
assert_allclose(res.ssr, res_ols.ssr)
assert_allclose(res.centered_tss, res_ols.centered_tss)
assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
assert_allclose(res.rsquared, res_ols.rsquared)
# Mean squares
assert_allclose(res.mse_model, res_ols.mse_model)
assert_allclose(res.mse_resid, res_ols.mse_resid)
assert_allclose(res.mse_total, res_ols.mse_total)
# Hypothesis tests
actual = res.t_test('m1 = 0')
desired = res_ols.t_test('m1 = 0')
assert_allclose(actual.statistic, desired.statistic)
assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)
actual = res.f_test('m1 = 0')
desired = res_ols.f_test('m1 = 0')
assert_allclose(actual.statistic, desired.statistic)
assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)
# Information criteria
# Note: the llf and llf_obs given in the results are based on the Kalman
# filter and so the ic given in results will not be identical to the
# OLS versions. Additionally, llf_recursive is comparable to the
# non-concentrated llf, and not the concentrated llf that is by default
# used in OLS. Compute new ic based on llf_alternative to compare.
actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
assert_allclose(actual_aic, res_ols.aic)
actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
assert_allclose(actual_bic, res_ols.bic)
def test_glm(constraints=None):
# More comprehensive tests against GLM estimates (this is sort of redundant
# given `test_ols`, but this is mostly to complement the tests in
# `test_glm_constrained`)
endog = dta.infl
exog = add_constant(dta[['unemp', 'm1']])
mod = RecursiveLS(endog, exog, constraints=constraints)
res = mod.fit()
mod_glm = GLM(endog, exog)
if constraints is None:
res_glm = mod_glm.fit()
else:
res_glm = mod_glm.fit_constrained(constraints=constraints)
# Regression coefficients, standard errors, and estimated scale
assert_allclose(res.params, res_glm.params)
assert_allclose(res.bse, res_glm.bse, atol=1e-6)
# Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
# the called the ML estimator and sometimes (e.g. later in section 5)
# denoted \tilde \sigma_*^2
assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale)
# DoF
# Note: GLM does not include intercept in DoF, so modify by -1
assert_equal(res.df_model - 1, res_glm.df_model)
# OLS residuals are equivalent to smoothed forecast errors
# (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
# (this follows since the smoothed state simply contains the
# full-information estimates of the regression coefficients)
actual = (mod.endog[:, 0] -
np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
assert_allclose(actual, res_glm.resid_response, atol=1e-7)
# Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
# Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
# full OLS loglikelihood (i.e. without the scale concentrated out).
desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale)
assert_allclose(res.llf_recursive, desired)
# Alternatively, we can construct the concentrated OLS loglikelihood
# by computing the scale term with `nobs` in the denominator rather than
# `nobs - d`.
scale_alternative = np.sum((
res.standardized_forecasts_error[0, 1:] *
res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
scale=scale_alternative**0.5)).sum()
assert_allclose(llf_alternative, res_glm.llf)
# Prediction
# TODO: prediction in this case is not working.
if constraints is None:
design = np.ones((1, 3, 10))
actual = res.forecast(10, design=design)
assert_allclose(actual, res_glm.predict(np.ones((10, 3))))
else:
design = np.ones((2, 3, 10))
assert_raises(NotImplementedError, res.forecast, 10, design=design)
# Hypothesis tests
actual = res.t_test('m1 = 0')
desired = res_glm.t_test('m1 = 0')
assert_allclose(actual.statistic, desired.statistic)
assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)
actual = res.f_test('m1 = 0')
desired = res_glm.f_test('m1 = 0')
assert_allclose(actual.statistic, desired.statistic)
assert_allclose(actual.pvalue, desired.pvalue)
# Information criteria
# Note: the llf and llf_obs given in the results are based on the Kalman
# filter and so the ic given in results will not be identical to the
# OLS versions. Additionally, llf_recursive is comparable to the
# non-concentrated llf, and not the concentrated llf that is by default
# used in OLS. Compute new ic based on llf_alternative to compare.
actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
assert_allclose(actual_aic, res_glm.aic)
# See gh#1733 for details on why the BIC does not match while AIC does
# actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
# assert_allclose(actual_bic, res_glm.bic)
def test_glm_constrained():
test_glm(constraints='m1 + unemp = 1')
def test_filter():
# Basic test for filtering
mod = RecursiveLS(endog, exog)
res = mod.filter()
# Test the RLS estimates against OLS estimates
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
def test_estimates():
mod = RecursiveLS(endog, exog)
res = mod.fit()
# Test for start_params
assert_equal(mod.start_params, 0)
# Test the RLS coefficient estimates against those from R (quantreg)
# Due to initialization issues, we get more agreement as we get
# farther from the initial values.
assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T,
results_R.iloc[:8][['beta1', 'beta2']], rtol=1e-5)
assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T,
results_R.iloc[7:18][['beta1', 'beta2']])
assert_allclose(res.recursive_coefficients.filtered[:, 19:].T,
results_R.iloc[17:][['beta1', 'beta2']])
# Test the RLS estimates against OLS estimates
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
@pytest.mark.matplotlib
def test_plots(close_figures):
exog = add_constant(dta[['m1', 'pop']])
mod = RecursiveLS(endog, exog)
res = mod.fit()
# Basic plot
try:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
except ImportError:
pass
fig = res.plot_recursive_coefficient()
# Specific variable
fig = res.plot_recursive_coefficient(variables=['m1'])
# All variables
fig = res.plot_recursive_coefficient(variables=[0, 'm1', 'pop'])
# Basic plot
fig = res.plot_cusum()
# Other alphas
for alpha in [0.01, 0.10]:
fig = res.plot_cusum(alpha=alpha)
# Invalid alpha
assert_raises(ValueError, res.plot_cusum, alpha=0.123)
# Basic plot
fig = res.plot_cusum_squares()
# Numpy input (no dates)
mod = RecursiveLS(endog.values, exog.values)
res = mod.fit()
# Basic plot
fig = res.plot_recursive_coefficient()
# Basic plot
fig = res.plot_cusum()
# Basic plot
fig = res.plot_cusum_squares()
def test_from_formula():
with pytest.warns(ValueWarning, match="No frequency information"):
mod = RecursiveLS.from_formula('cpi ~ m1', data=dta)
res = mod.fit()
# Test the RLS estimates against OLS estimates
mod_ols = OLS.from_formula('cpi ~ m1', data=dta)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
def test_resid_recursive():
mod = RecursiveLS(endog, exog)
res = mod.fit()
# Test the recursive residuals against those from R (strucchange)
assert_allclose(res.resid_recursive[2:10].T,
results_R.iloc[:8]['rec_resid'])
assert_allclose(res.resid_recursive[9:20].T,
results_R.iloc[7:18]['rec_resid'])
assert_allclose(res.resid_recursive[19:].T,
results_R.iloc[17:]['rec_resid'])
# Test the RLS estimates against those from Stata (cusum6)
assert_allclose(res.resid_recursive[3:],
results_stata.iloc[3:]['rr'], atol=1e-5, rtol=1e-5)
# Test the RLS estimates against statsmodels estimates
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:]
assert_allclose(res.resid_recursive[2:], desired_resid_recursive)
def test_cusum():
mod = RecursiveLS(endog, exog)
res = mod.fit()
# Test the cusum statistics against those from R (strucchange)
Loading ...