# -*- coding: utf-8 -*-
"""Testing OLS robust covariance matrices against STATA
Created on Mon Oct 28 15:25:14 2013
Author: Josef Perktold
"""
import pytest
import numpy as np
from scipy import stats
from numpy.testing import (assert_allclose, assert_equal, assert_warns,
assert_raises)
from statsmodels.regression.linear_model import OLS, WLS
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.tools import add_constant
from statsmodels.datasets import macrodata
from statsmodels.tools.sm_exceptions import InvalidTestWarning
from .results import results_macro_ols_robust as res
from .results import results_grunfeld_ols_robust_cluster as res2
# TODO: implement test_hac_simple
class CheckOLSRobust(object):
def test_basic(self):
res1 = self.res1
res2 = self.res2
rtol = getattr(self, 'rtol', 1e-10)
assert_allclose(res1.params, res2.params, rtol=rtol)
assert_allclose(self.bse_robust, res2.bse, rtol=rtol)
assert_allclose(self.cov_robust, res2.cov, rtol=rtol)
@pytest.mark.smoke
def test_t_test_summary(self):
res1 = self.res1
mat = np.eye(len(res1.params))
# TODO: if the t_test call is expensive, possibly make it a fixture?
tt = res1.t_test(mat, cov_p=self.cov_robust)
tt.summary()
@pytest.mark.smoke
def test_t_test_summary_frame(self):
res1 = self.res1
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
tt.summary_frame()
@pytest.mark.smoke
def test_f_test_summary(self):
res1 = self.res1
mat = np.eye(len(res1.params))
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
ft.summary()
def test_tests(self): # TODO: break into well-scoped tests
# Note: differences between small (t-distribution, ddof) and large (normal)
# F statistic has no ddof correction in large, but uses F distribution (?)
res1 = self.res1
res2 = self.res2
rtol = getattr(self, 'rtol', 1e-10)
rtolh = getattr(self, 'rtolh', 1e-12)
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
# has 'effect', 'pvalue', 'sd', 'tvalue'
# TODO confint missing
assert_allclose(tt.effect, res2.params, rtol=rtol)
assert_allclose(tt.sd, res2.bse, rtol=rtol)
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtol)
if self.small:
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
else:
pval = stats.norm.sf(np.abs(tt.tvalue)) * 2
assert_allclose(pval, res2.pvalues, rtol=5 * rtol, atol=1e-25)
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
if self.small:
#'df_denom', 'df_num', 'fvalue', 'pvalue'
assert_allclose(ft.fvalue, res2.F, rtol=rtol)
# f-pvalue is not directly available in Stata results, but is in ivreg2
if hasattr(res2, 'Fp'):
assert_allclose(ft.pvalue, res2.Fp, rtol=rtol)
else:
if not getattr(self, 'skip_f', False):
dof_corr = res1.df_resid * 1. / res1.nobs
assert_allclose(ft.fvalue * dof_corr, res2.F, rtol=rtol)
if hasattr(res2, 'df_r'):
assert_equal(ft.df_num, res2.df_m)
assert_equal(ft.df_denom, res2.df_r)
else:
# ivreg2
assert_equal(ft.df_num, res2.Fdf1)
assert_equal(ft.df_denom, res2.Fdf2)
class TestOLSRobust1(CheckOLSRobust):
# compare with regress robust
def setup(self):
res_ols = self.res1
self.bse_robust = res_ols.HC1_se
self.cov_robust = res_ols.cov_HC1
self.small = True
self.res2 = res.results_hc0
@classmethod
def setup_class(cls):
d2 = macrodata.load_pandas().data
g_gdp = 400*np.diff(np.log(d2['realgdp'].values))
g_inv = 400*np.diff(np.log(d2['realinv'].values))
exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values], prepend=False)
cls.res1 = res_ols = OLS(g_inv, exogg).fit()
class TestOLSRobust2(TestOLSRobust1):
# compare with ivreg robust small
def setup(self):
res_ols = self.res1
self.bse_robust = res_ols.HC1_se
self.cov_robust = res_ols.cov_HC1
self.small = True
self.res2 = res.results_ivhc0_small
class TestOLSRobust3(TestOLSRobust1):
# compare with ivreg robust (not small)
def setup(self):
res_ols = self.res1
self.bse_robust = res_ols.HC0_se
self.cov_robust = res_ols.cov_HC0
self.small = False
self.res2 = res.results_ivhc0_large
class TestOLSRobustHacSmall(TestOLSRobust1):
# compare with ivreg robust small
def setup(self):
res_ols = self.res1
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust = se1
self.cov_robust = cov1
self.small = True
self.res2 = res.results_ivhac4_small
class TestOLSRobustHacLarge(TestOLSRobust1):
# compare with ivreg robust (not small)
def setup(self):
res_ols = self.res1
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust = se1
self.cov_robust = cov1
self.small = False
self.res2 = res.results_ivhac4_large
class CheckOLSRobustNewMixin(object):
# This uses the robust covariance as default covariance
def test_compare(self):
rtol = getattr(self, 'rtol', 1e-10)
assert_allclose(self.cov_robust, self.cov_robust2, rtol=rtol)
assert_allclose(self.bse_robust, self.bse_robust2, rtol=rtol)
def test_fvalue(self):
if not getattr(self, 'skip_f', False):
rtol = getattr(self, 'rtol', 1e-10)
assert_allclose(self.res1.fvalue, self.res2.F, rtol=rtol)
if hasattr(self.res2, 'Fp'):
# only available with ivreg2
assert_allclose(self.res1.f_pvalue, self.res2.Fp, rtol=rtol)
else:
raise pytest.skip("TODO: document why this test is skipped")
def test_confint(self):
rtol = getattr(self, 'rtol', 1e-10)
ci1 = self.res1.conf_int()
ci2 = self.res2.params_table[:,4:6]
assert_allclose(ci1, ci2, rtol=rtol)
# check critical value
crit1 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
crit2 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
assert_allclose(crit1, crit2, rtol=12)
def test_ttest(self):
res1 = self.res1
res2 = self.res2
rtol = getattr(self, 'rtol', 1e-10)
rtolh = getattr(self, 'rtol', 1e-12)
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
# has 'effect', 'pvalue', 'sd', 'tvalue'
# TODO confint missing
assert_allclose(tt.effect, res2.params, rtol=rtolh)
assert_allclose(tt.sd, res2.bse, rtol=rtol)
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtolh)
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
ci1 = tt.conf_int()
ci2 = self.res2.params_table[:,4:6]
assert_allclose(ci1, ci2, rtol=rtol)
def test_scale(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-5
# Note we always use df_resid for scale
# Stata uses nobs or df_resid for rmse, not always available in Stata
#assert_allclose(res1.scale, res2.rmse**2 * res2.N / (res2.N - res2.df_m - 1), rtol=rtol)
skip = False
if hasattr(res2, 'rss'):
scale = res2.rss / (res2.N - res2.df_m - 1)
elif hasattr(res2, 'rmse'):
scale = res2.rmse**2
else:
skip = True
if isinstance(res1.model, WLS):
skip = True
# Stata uses different scaling and using unweighted resid for rmse
if not skip:
assert_allclose(res1.scale, scale, rtol=rtol)
if not res2.vcetype == 'Newey-West':
# no rsquared in Stata
r2 = res2.r2 if hasattr(res2, 'r2') else res2.r2c
assert_allclose(res1.rsquared, r2, rtol=rtol, err_msg=str(skip))
# consistency checks, not against Stata
df_resid = res1.nobs - res1.df_model - 1
assert_equal(res1.df_resid, df_resid)
# variance of resid_pearson is 1, with ddof, and loc=0
psum = (res1.resid_pearson**2).sum()
assert_allclose(psum, df_resid, rtol=1e-13)
@pytest.mark.smoke
def test_summary(self):
self.res1.summary()
class TestOLSRobust2SmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup(self):
res_ols = self.res1.get_robustcov_results('HC1', use_t=True)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
self.bse_robust2 = res_ols.HC1_se
self.cov_robust2 = res_ols.cov_HC1
self.small = True
self.res2 = res.results_ivhc0_small
def test_compare(self):
#check that we get a warning using the nested compare methods
res1 = self.res1
endog = res1.model.endog
exog = res1.model.exog[:, [0, 2]] # drop one variable
res_ols2 = OLS(endog, exog).fit()
# results from Stata
r_pval = .0307306938402991
r_chi2 = 4.667944083588736
r_df = 1
assert_warns(InvalidTestWarning, res1.compare_lr_test, res_ols2)
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
chi2, pval, df = res1.compare_lr_test(res_ols2)
assert_allclose(chi2, r_chi2, rtol=1e-11)
assert_allclose(pval, r_pval, rtol=1e-11)
assert_equal(df, r_df)
assert_warns(InvalidTestWarning, res1.compare_f_test, res_ols2)
#fva, pval, df = res1.compare_f_test(res_ols2)
class TestOLSRobustHACSmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup(self):
res_ols = self.res1.get_robustcov_results('HAC', maxlags=4,
use_correction=True,
use_t=True)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res.results_ivhac4_small
class TestOLSRobust2LargeNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup(self):
res_ols = self.res1.get_robustcov_results('HC0')
res_ols.use_t = False
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
self.bse_robust2 = res_ols.HC0_se
self.cov_robust2 = res_ols.cov_HC0
self.small = False
self.res2 = res.results_ivhc0_large
@pytest.mark.skip(reason="not refactored yet for `large`")
def test_fvalue(self):
super(TestOLSRobust2LargeNew, self).test_fvalue()
@pytest.mark.skip(reason="not refactored yet for `large`")
def test_confint(self):
super(TestOLSRobust2LargeNew, self).test_confint()
Loading ...