Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ regression / tests / test_recursive_ls.py

"""
Tests for recursive least squares models

Author: Chad Fulton
License: Simplified-BSD
"""

import numpy as np
import pandas as pd
import pytest
from scipy.stats import norm
import os

from statsmodels.datasets import macrodata
from statsmodels.regression.linear_model import OLS
from statsmodels.genmod.api import GLM
from statsmodels.tools.eval_measures import aic, bic
from statsmodels.regression.recursive_ls import RecursiveLS
from statsmodels.stats.diagnostic import recursive_olsresiduals
from statsmodels.tools import add_constant
from statsmodels.tools.sm_exceptions import ValueWarning
from numpy.testing import assert_equal, assert_raises, assert_allclose

current_path = os.path.dirname(os.path.abspath(__file__))

results_R_path = 'results' + os.sep + 'results_rls_R.csv'
results_R = pd.read_csv(current_path + os.sep + results_R_path)

results_stata_path = 'results' + os.sep + 'results_rls_stata.csv'
results_stata = pd.read_csv(current_path + os.sep + results_stata_path)

dta = macrodata.load_pandas().data
dta.index = pd.date_range(start='1959-01-01', end='2009-07-01', freq='QS')

endog = dta['cpi']
exog = add_constant(dta['m1'])


def test_endog():
    # Tests for numpy input
    mod = RecursiveLS(endog.values, exog.values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)

    # Tests for 1-dim exog
    mod = RecursiveLS(endog, dta['m1'].values)
    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)

def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)


def test_glm(constraints=None):
    # More comprehensive tests against GLM estimates (this is sort of redundant
    # given `test_ols`, but this is mostly to complement the tests in
    # `test_glm_constrained`)
    endog = dta.infl
    exog = add_constant(dta[['unemp', 'm1']])

    mod = RecursiveLS(endog, exog, constraints=constraints)
    res = mod.fit()

    mod_glm = GLM(endog, exog)
    if constraints is None:
        res_glm = mod_glm.fit()
    else:
        res_glm = mod_glm.fit_constrained(constraints=constraints)

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_glm.params)
    assert_allclose(res.bse, res_glm.bse, atol=1e-6)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale)

    # DoF
    # Note: GLM does not include intercept in DoF, so modify by -1
    assert_equal(res.df_model - 1, res_glm.df_model)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_glm.resid_response, atol=1e-7)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can construct the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_glm.llf)

    # Prediction
    # TODO: prediction in this case is not working.
    if constraints is None:
        design = np.ones((1, 3, 10))
        actual = res.forecast(10, design=design)
        assert_allclose(actual, res_glm.predict(np.ones((10, 3))))
    else:
        design = np.ones((2, 3, 10))
        assert_raises(NotImplementedError, res.forecast, 10, design=design)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_glm.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_glm.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_glm.aic)
    # See gh#1733 for details on why the BIC does not match while AIC does
    # actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    # assert_allclose(actual_bic, res_glm.bic)

def test_glm_constrained():
    test_glm(constraints='m1 + unemp = 1')


def test_filter():
    # Basic test for filtering
    mod = RecursiveLS(endog, exog)
    res = mod.filter()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)


def test_estimates():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test for start_params
    assert_equal(mod.start_params, 0)


    # Test the RLS coefficient estimates against those from R (quantreg)
    # Due to initialization issues, we get more agreement as we get
    # farther from the initial values.
    assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T,
                    results_R.iloc[:8][['beta1', 'beta2']], rtol=1e-5)
    assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T,
                    results_R.iloc[7:18][['beta1', 'beta2']])
    assert_allclose(res.recursive_coefficients.filtered[:, 19:].T,
                    results_R.iloc[17:][['beta1', 'beta2']])

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)


@pytest.mark.matplotlib
def test_plots(close_figures):
    exog = add_constant(dta[['m1', 'pop']])
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Basic plot
    try:
        from pandas.plotting import register_matplotlib_converters
        register_matplotlib_converters()
    except ImportError:
        pass
    fig = res.plot_recursive_coefficient()

    # Specific variable
    fig = res.plot_recursive_coefficient(variables=['m1'])

    # All variables
    fig = res.plot_recursive_coefficient(variables=[0, 'm1', 'pop'])

    # Basic plot
    fig = res.plot_cusum()

    # Other alphas
    for alpha in [0.01, 0.10]:
        fig = res.plot_cusum(alpha=alpha)

    # Invalid alpha
    assert_raises(ValueError, res.plot_cusum, alpha=0.123)

    # Basic plot
    fig = res.plot_cusum_squares()

    # Numpy input (no dates)
    mod = RecursiveLS(endog.values, exog.values)
    res = mod.fit()

    # Basic plot
    fig = res.plot_recursive_coefficient()

    # Basic plot
    fig = res.plot_cusum()

    # Basic plot
    fig = res.plot_cusum_squares()


def test_from_formula():
    with pytest.warns(ValueWarning, match="No frequency information"):
        mod = RecursiveLS.from_formula('cpi ~ m1', data=dta)

    res = mod.fit()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS.from_formula('cpi ~ m1', data=dta)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)


def test_resid_recursive():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the recursive residuals against those from R (strucchange)
    assert_allclose(res.resid_recursive[2:10].T,
                    results_R.iloc[:8]['rec_resid'])
    assert_allclose(res.resid_recursive[9:20].T,
                    results_R.iloc[7:18]['rec_resid'])
    assert_allclose(res.resid_recursive[19:].T,
                    results_R.iloc[17:]['rec_resid'])

    # Test the RLS estimates against those from Stata (cusum6)
    assert_allclose(res.resid_recursive[3:],
                    results_stata.iloc[3:]['rr'], atol=1e-5, rtol=1e-5)

    # Test the RLS estimates against statsmodels estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:]
    assert_allclose(res.resid_recursive[2:], desired_resid_recursive)


def test_cusum():
    mod = RecursiveLS(endog, exog)
    res = mod.fit()

    # Test the cusum statistics against those from R (strucchange)
Loading ...