"""
Test functions for models.regression
"""
# TODO: Test for LM
from statsmodels.compat.python import lrange
import warnings
import pandas
import numpy as np
from numpy.testing import (assert_almost_equal, assert_,
assert_raises, assert_equal, assert_allclose)
import pytest
from scipy.linalg import toeplitz
from statsmodels.tools.tools import add_constant, categorical
from statsmodels.regression.linear_model import (OLS, WLS, GLS, yule_walker,
burg)
from statsmodels.datasets import longley
from scipy.stats import t as student_t
DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_7 = 7
DECIMAL_0 = 0
try:
import cvxopt # noqa:F401
has_cvxopt = True
except ImportError:
has_cvxopt = False
class CheckRegressionResults(object):
"""
res2 contains results from Rmodelwrap or were obtained from a statistical
packages such as R, Stata, or SAS and were written to model_results
"""
decimal_params = DECIMAL_4
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params,
self.decimal_params)
decimal_standarderrors = DECIMAL_4
def test_standarderrors(self):
assert_almost_equal(self.res1.bse, self.res2.bse,
self.decimal_standarderrors)
decimal_confidenceintervals = DECIMAL_4
def test_confidenceintervals(self):
# NOTE: stata rounds residuals (at least) to sig digits so approx_equal
conf1 = self.res1.conf_int()
conf2 = self.res2.conf_int()
for i in range(len(conf1)):
assert_allclose(conf1[i][0], conf2[i][0],
rtol=10**-self.decimal_confidenceintervals)
assert_allclose(conf1[i][1], conf2[i][1],
rtol=10**-self.decimal_confidenceintervals)
decimal_conf_int_subset = DECIMAL_4
def test_conf_int_subset(self):
if len(self.res1.params) > 1:
ci1 = self.res1.conf_int(cols=(1, 2))
ci2 = self.res1.conf_int()[1:3]
assert_almost_equal(ci1, ci2, self.decimal_conf_int_subset)
else:
pass
decimal_scale = DECIMAL_4
def test_scale(self):
assert_almost_equal(self.res1.scale, self.res2.scale,
self.decimal_scale)
decimal_rsquared = DECIMAL_4
def test_rsquared(self):
assert_almost_equal(self.res1.rsquared, self.res2.rsquared,
self.decimal_rsquared)
decimal_rsquared_adj = DECIMAL_4
def test_rsquared_adj(self):
assert_almost_equal(self.res1.rsquared_adj, self.res2.rsquared_adj,
self.decimal_rsquared_adj)
def test_degrees(self):
assert_equal(self.res1.model.df_model, self.res2.df_model)
assert_equal(self.res1.model.df_resid, self.res2.df_resid)
decimal_ess = DECIMAL_4
def test_ess(self):
# Explained Sum of Squares
assert_almost_equal(self.res1.ess, self.res2.ess,
self.decimal_ess)
decimal_ssr = DECIMAL_4
def test_sumof_squaredresids(self):
assert_almost_equal(self.res1.ssr, self.res2.ssr, self.decimal_ssr)
decimal_mse_resid = DECIMAL_4
def test_mse_resid(self):
# Mean squared error of residuals
assert_almost_equal(self.res1.mse_model, self.res2.mse_model,
self.decimal_mse_resid)
decimal_mse_model = DECIMAL_4
def test_mse_model(self):
assert_almost_equal(self.res1.mse_resid, self.res2.mse_resid,
self.decimal_mse_model)
decimal_mse_total = DECIMAL_4
def test_mse_total(self):
assert_almost_equal(self.res1.mse_total, self.res2.mse_total,
self.decimal_mse_total,
err_msg="Test class %s" % self)
decimal_fvalue = DECIMAL_4
def test_fvalue(self):
# did not change this, not sure it should complain -inf not equal -inf
# if not (np.isinf(self.res1.fvalue) and np.isinf(self.res2.fvalue)):
assert_almost_equal(self.res1.fvalue, self.res2.fvalue,
self.decimal_fvalue)
decimal_loglike = DECIMAL_4
def test_loglike(self):
assert_almost_equal(self.res1.llf, self.res2.llf, self.decimal_loglike)
decimal_aic = DECIMAL_4
def test_aic(self):
assert_almost_equal(self.res1.aic, self.res2.aic, self.decimal_aic)
decimal_bic = DECIMAL_4
def test_bic(self):
assert_almost_equal(self.res1.bic, self.res2.bic, self.decimal_bic)
decimal_pvalues = DECIMAL_4
def test_pvalues(self):
assert_almost_equal(self.res1.pvalues, self.res2.pvalues,
self.decimal_pvalues)
decimal_wresid = DECIMAL_4
def test_wresid(self):
assert_almost_equal(self.res1.wresid, self.res2.wresid,
self.decimal_wresid)
decimal_resids = DECIMAL_4
def test_resids(self):
assert_almost_equal(self.res1.resid, self.res2.resid,
self.decimal_resids)
decimal_norm_resids = DECIMAL_4
def test_norm_resids(self):
assert_almost_equal(self.res1.resid_pearson, self.res2.resid_pearson,
self.decimal_norm_resids)
# TODO: test fittedvalues and what else?
class TestOLS(CheckRegressionResults):
@classmethod
def setup_class(cls):
from .results.results_regression import Longley
data = longley.load(as_pandas=False)
data.exog = add_constant(data.exog, prepend=False)
res1 = OLS(data.endog, data.exog).fit()
res2 = Longley()
res2.wresid = res1.wresid # workaround hack
cls.res1 = res1
cls.res2 = res2
res_qr = OLS(data.endog, data.exog).fit(method="qr")
model_qr = OLS(data.endog, data.exog)
Q, R = np.linalg.qr(data.exog)
model_qr.exog_Q, model_qr.exog_R = Q, R
model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
model_qr.rank = np.linalg.matrix_rank(R)
res_qr2 = model_qr.fit(method="qr")
cls.res_qr = res_qr
cls.res_qr_manual = res_qr2
def test_eigenvalues(self):
eigenval_perc_diff = (self.res_qr.eigenvals -
self.res_qr_manual.eigenvals)
eigenval_perc_diff /= self.res_qr.eigenvals
zeros = np.zeros_like(eigenval_perc_diff)
assert_almost_equal(eigenval_perc_diff, zeros, DECIMAL_7)
# Robust error tests. Compare values computed with SAS
def test_HC0_errors(self):
# They are split up because the copied results do not have any
# DECIMAL_4 places for the last place.
assert_almost_equal(self.res1.HC0_se[:-1],
self.res2.HC0_se[:-1], DECIMAL_4)
assert_allclose(np.round(self.res1.HC0_se[-1]),
self.res2.HC0_se[-1])
def test_HC1_errors(self):
assert_almost_equal(self.res1.HC1_se[:-1],
self.res2.HC1_se[:-1], DECIMAL_4)
# Note: tolerance is tight; rtol=3e-7 fails while 4e-7 passes
assert_allclose(self.res1.HC1_se[-1], self.res2.HC1_se[-1],
rtol=4e-7)
def test_HC2_errors(self):
assert_almost_equal(self.res1.HC2_se[:-1],
self.res2.HC2_se[:-1], DECIMAL_4)
# Note: tolerance is tight; rtol=4e-7 fails while 5e-7 passes
assert_allclose(self.res1.HC2_se[-1], self.res2.HC2_se[-1],
rtol=5e-7)
def test_HC3_errors(self):
assert_almost_equal(self.res1.HC3_se[:-1],
self.res2.HC3_se[:-1], DECIMAL_4)
# Note: tolerance is tight; rtol=1e-7 fails while 1.5e-7 passes
assert_allclose(self.res1.HC3_se[-1], self.res2.HC3_se[-1],
rtol=1.5e-7)
def test_qr_params(self):
assert_almost_equal(self.res1.params,
self.res_qr.params, 6)
def test_qr_normalized_cov_params(self):
# todo: need assert_close
assert_almost_equal(np.ones_like(self.res1.normalized_cov_params),
self.res1.normalized_cov_params /
self.res_qr.normalized_cov_params, 5)
def test_missing(self):
data = longley.load(as_pandas=False)
data.exog = add_constant(data.exog, prepend=False)
data.endog[[3, 7, 14]] = np.nan
mod = OLS(data.endog, data.exog, missing='drop')
assert_equal(mod.endog.shape[0], 13)
assert_equal(mod.exog.shape[0], 13)
def test_rsquared_adj_overfit(self):
# Test that if df_resid = 0, rsquared_adj = 0.
# This is a regression test for user issue:
# https://github.com/statsmodels/statsmodels/issues/868
with warnings.catch_warnings(record=True):
x = np.random.randn(5)
y = np.random.randn(5, 6)
results = OLS(x, y).fit()
rsquared_adj = results.rsquared_adj
assert_equal(rsquared_adj, np.nan)
def test_qr_alternatives(self):
assert_allclose(self.res_qr.params, self.res_qr_manual.params,
rtol=5e-12)
def test_norm_resid(self):
resid = self.res1.wresid
norm_resid = resid / np.sqrt(np.sum(resid**2.0) / self.res1.df_resid)
model_norm_resid = self.res1.resid_pearson
assert_almost_equal(model_norm_resid, norm_resid, DECIMAL_7)
def test_norm_resid_zero_variance(self):
with warnings.catch_warnings(record=True):
y = self.res1.model.endog
res = OLS(y, y).fit()
assert_allclose(res.scale, 0, atol=1e-20)
assert_allclose(res.wresid, res.resid_pearson, atol=5e-11)
class TestRTO(CheckRegressionResults):
@classmethod
def setup_class(cls):
from .results.results_regression import LongleyRTO
data = longley.load(as_pandas=False)
res1 = OLS(data.endog, data.exog).fit()
res2 = LongleyRTO()
res2.wresid = res1.wresid # workaround hack
cls.res1 = res1
cls.res2 = res2
res_qr = OLS(data.endog, data.exog).fit(method="qr")
cls.res_qr = res_qr
class TestFtest(object):
"""
Tests f_test vs. RegressionResults
"""
@classmethod
def setup_class(cls):
data = longley.load(as_pandas=False)
data.exog = add_constant(data.exog, prepend=False)
cls.res1 = OLS(data.endog, data.exog).fit()
R = np.identity(7)[:-1, :]
cls.Ftest = cls.res1.f_test(R)
def test_F(self):
assert_almost_equal(self.Ftest.fvalue, self.res1.fvalue, DECIMAL_4)
def test_p(self):
assert_almost_equal(self.Ftest.pvalue, self.res1.f_pvalue, DECIMAL_4)
def test_Df_denom(self):
assert_equal(self.Ftest.df_denom, self.res1.model.df_resid)
def test_Df_num(self):
assert_equal(self.Ftest.df_num, 6)
class TestFTest2(object):
"""
A joint test that the coefficient on
GNP = the coefficient on UNEMP and that the coefficient on
POP = the coefficient on YEAR for the Longley dataset.
Ftest1 is from statsmodels. Results are from Rpy using R's car library.
"""
@classmethod
def setup_class(cls):
data = longley.load(as_pandas=False)
data.exog = add_constant(data.exog, prepend=False)
res1 = OLS(data.endog, data.exog).fit()
R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]]
cls.Ftest1 = res1.f_test(R2)
hyp = 'x2 = x3, x5 = x6'
cls.NewFtest1 = res1.f_test(hyp)
def test_new_ftest(self):
assert_equal(self.NewFtest1.fvalue, self.Ftest1.fvalue)
def test_fvalue(self):
assert_almost_equal(self.Ftest1.fvalue, 9.7404618732968196, DECIMAL_4)
def test_pvalue(self):
assert_almost_equal(self.Ftest1.pvalue, 0.0056052885317493459,
DECIMAL_4)
def test_df_denom(self):
assert_equal(self.Ftest1.df_denom, 9)
def test_df_num(self):
assert_equal(self.Ftest1.df_num, 2)
class TestFtestQ(object):
"""
A joint hypothesis test that Rb = q. Coefficient tests are essentially
made up. Test values taken from Stata.
"""
@classmethod
def setup_class(cls):
data = longley.load(as_pandas=False)
data.exog = add_constant(data.exog, prepend=False)
Loading ...