Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ regression / tests / test_regression.py

"""
Test functions for models.regression
"""
# TODO: Test for LM
from statsmodels.compat.python import lrange
import warnings
import pandas
import numpy as np
from numpy.testing import (assert_almost_equal, assert_,
                           assert_raises, assert_equal, assert_allclose)
import pytest
from scipy.linalg import toeplitz
from statsmodels.tools.tools import add_constant, categorical
from statsmodels.regression.linear_model import (OLS, WLS, GLS, yule_walker,
                                                 burg)
from statsmodels.datasets import longley
from scipy.stats import t as student_t

DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_7 = 7
DECIMAL_0 = 0

try:
    import cvxopt  # noqa:F401
    has_cvxopt = True
except ImportError:
    has_cvxopt = False

class CheckRegressionResults(object):
    """
    res2 contains results from Rmodelwrap or were obtained from a statistical
    packages such as R, Stata, or SAS and were written to model_results
    """

    decimal_params = DECIMAL_4
    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params,
                            self.decimal_params)

    decimal_standarderrors = DECIMAL_4
    def test_standarderrors(self):
        assert_almost_equal(self.res1.bse, self.res2.bse,
                            self.decimal_standarderrors)

    decimal_confidenceintervals = DECIMAL_4
    def test_confidenceintervals(self):
        # NOTE: stata rounds residuals (at least) to sig digits so approx_equal
        conf1 = self.res1.conf_int()
        conf2 = self.res2.conf_int()
        for i in range(len(conf1)):
            assert_allclose(conf1[i][0], conf2[i][0],
                            rtol=10**-self.decimal_confidenceintervals)
            assert_allclose(conf1[i][1], conf2[i][1],
                            rtol=10**-self.decimal_confidenceintervals)

    decimal_conf_int_subset = DECIMAL_4
    def test_conf_int_subset(self):
        if len(self.res1.params) > 1:
            ci1 = self.res1.conf_int(cols=(1, 2))
            ci2 = self.res1.conf_int()[1:3]
            assert_almost_equal(ci1, ci2, self.decimal_conf_int_subset)
        else:
            pass

    decimal_scale = DECIMAL_4
    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale,
                            self.decimal_scale)

    decimal_rsquared = DECIMAL_4
    def test_rsquared(self):
        assert_almost_equal(self.res1.rsquared, self.res2.rsquared,
                            self.decimal_rsquared)

    decimal_rsquared_adj = DECIMAL_4
    def test_rsquared_adj(self):
        assert_almost_equal(self.res1.rsquared_adj, self.res2.rsquared_adj,
                            self.decimal_rsquared_adj)

    def test_degrees(self):
        assert_equal(self.res1.model.df_model, self.res2.df_model)
        assert_equal(self.res1.model.df_resid, self.res2.df_resid)

    decimal_ess = DECIMAL_4
    def test_ess(self):
        # Explained Sum of Squares
        assert_almost_equal(self.res1.ess, self.res2.ess,
                            self.decimal_ess)

    decimal_ssr = DECIMAL_4
    def test_sumof_squaredresids(self):
        assert_almost_equal(self.res1.ssr, self.res2.ssr, self.decimal_ssr)

    decimal_mse_resid = DECIMAL_4
    def test_mse_resid(self):
        # Mean squared error of residuals
        assert_almost_equal(self.res1.mse_model, self.res2.mse_model,
                            self.decimal_mse_resid)

    decimal_mse_model = DECIMAL_4
    def test_mse_model(self):
        assert_almost_equal(self.res1.mse_resid, self.res2.mse_resid,
                            self.decimal_mse_model)

    decimal_mse_total = DECIMAL_4
    def test_mse_total(self):
        assert_almost_equal(self.res1.mse_total, self.res2.mse_total,
                            self.decimal_mse_total,
                            err_msg="Test class %s" % self)

    decimal_fvalue = DECIMAL_4
    def test_fvalue(self):
        # did not change this, not sure it should complain -inf not equal -inf
        # if not (np.isinf(self.res1.fvalue) and np.isinf(self.res2.fvalue)):
        assert_almost_equal(self.res1.fvalue, self.res2.fvalue,
                            self.decimal_fvalue)

    decimal_loglike = DECIMAL_4
    def test_loglike(self):
        assert_almost_equal(self.res1.llf, self.res2.llf, self.decimal_loglike)

    decimal_aic = DECIMAL_4
    def test_aic(self):
        assert_almost_equal(self.res1.aic, self.res2.aic, self.decimal_aic)

    decimal_bic = DECIMAL_4
    def test_bic(self):
        assert_almost_equal(self.res1.bic, self.res2.bic, self.decimal_bic)

    decimal_pvalues = DECIMAL_4
    def test_pvalues(self):
        assert_almost_equal(self.res1.pvalues, self.res2.pvalues,
                            self.decimal_pvalues)

    decimal_wresid = DECIMAL_4
    def test_wresid(self):
        assert_almost_equal(self.res1.wresid, self.res2.wresid,
                            self.decimal_wresid)

    decimal_resids = DECIMAL_4
    def test_resids(self):
        assert_almost_equal(self.res1.resid, self.res2.resid,
                            self.decimal_resids)

    decimal_norm_resids = DECIMAL_4
    def test_norm_resids(self):
        assert_almost_equal(self.res1.resid_pearson, self.res2.resid_pearson,
                            self.decimal_norm_resids)

# TODO: test fittedvalues and what else?


class TestOLS(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from .results.results_regression import Longley
        data = longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid  # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = np.linalg.matrix_rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2

    def test_eigenvalues(self):
        eigenval_perc_diff = (self.res_qr.eigenvals -
                              self.res_qr_manual.eigenvals)
        eigenval_perc_diff /= self.res_qr.eigenvals
        zeros = np.zeros_like(eigenval_perc_diff)
        assert_almost_equal(eigenval_perc_diff, zeros, DECIMAL_7)

    # Robust error tests.  Compare values computed with SAS
    def test_HC0_errors(self):
        # They are split up because the copied results do not have any
        # DECIMAL_4 places for the last place.
        assert_almost_equal(self.res1.HC0_se[:-1],
                            self.res2.HC0_se[:-1], DECIMAL_4)
        assert_allclose(np.round(self.res1.HC0_se[-1]),
                        self.res2.HC0_se[-1])

    def test_HC1_errors(self):
        assert_almost_equal(self.res1.HC1_se[:-1],
                            self.res2.HC1_se[:-1], DECIMAL_4)
        # Note: tolerance is tight; rtol=3e-7 fails while 4e-7 passes
        assert_allclose(self.res1.HC1_se[-1], self.res2.HC1_se[-1],
                        rtol=4e-7)

    def test_HC2_errors(self):
        assert_almost_equal(self.res1.HC2_se[:-1],
                            self.res2.HC2_se[:-1], DECIMAL_4)
        # Note: tolerance is tight; rtol=4e-7 fails while 5e-7 passes
        assert_allclose(self.res1.HC2_se[-1], self.res2.HC2_se[-1],
                        rtol=5e-7)

    def test_HC3_errors(self):
        assert_almost_equal(self.res1.HC3_se[:-1],
                            self.res2.HC3_se[:-1], DECIMAL_4)
        # Note: tolerance is tight; rtol=1e-7 fails while 1.5e-7 passes
        assert_allclose(self.res1.HC3_se[-1], self.res2.HC3_se[-1],
                        rtol=1.5e-7)

    def test_qr_params(self):
        assert_almost_equal(self.res1.params,
                            self.res_qr.params, 6)

    def test_qr_normalized_cov_params(self):
        # todo: need assert_close
        assert_almost_equal(np.ones_like(self.res1.normalized_cov_params),
                            self.res1.normalized_cov_params /
                            self.res_qr.normalized_cov_params, 5)

    def test_missing(self):
        data = longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        data.endog[[3, 7, 14]] = np.nan
        mod = OLS(data.endog, data.exog, missing='drop')
        assert_equal(mod.endog.shape[0], 13)
        assert_equal(mod.exog.shape[0], 13)

    def test_rsquared_adj_overfit(self):
        # Test that if df_resid = 0, rsquared_adj = 0.
        # This is a regression test for user issue:
        # https://github.com/statsmodels/statsmodels/issues/868
        with warnings.catch_warnings(record=True):
            x = np.random.randn(5)
            y = np.random.randn(5, 6)
            results = OLS(x, y).fit()
            rsquared_adj = results.rsquared_adj
            assert_equal(rsquared_adj, np.nan)

    def test_qr_alternatives(self):
        assert_allclose(self.res_qr.params, self.res_qr_manual.params,
                        rtol=5e-12)

    def test_norm_resid(self):
        resid = self.res1.wresid
        norm_resid = resid / np.sqrt(np.sum(resid**2.0) / self.res1.df_resid)
        model_norm_resid = self.res1.resid_pearson
        assert_almost_equal(model_norm_resid, norm_resid, DECIMAL_7)

    def test_norm_resid_zero_variance(self):
        with warnings.catch_warnings(record=True):
            y = self.res1.model.endog
            res = OLS(y, y).fit()
            assert_allclose(res.scale, 0, atol=1e-20)
            assert_allclose(res.wresid, res.resid_pearson, atol=5e-11)


class TestRTO(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from .results.results_regression import LongleyRTO
        data = longley.load(as_pandas=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = LongleyRTO()
        res2.wresid = res1.wresid  # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")
        cls.res_qr = res_qr

class TestFtest(object):
    """
    Tests f_test vs. RegressionResults
    """
    @classmethod
    def setup_class(cls):
        data = longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        cls.res1 = OLS(data.endog, data.exog).fit()
        R = np.identity(7)[:-1, :]
        cls.Ftest = cls.res1.f_test(R)

    def test_F(self):
        assert_almost_equal(self.Ftest.fvalue, self.res1.fvalue, DECIMAL_4)

    def test_p(self):
        assert_almost_equal(self.Ftest.pvalue, self.res1.f_pvalue, DECIMAL_4)

    def test_Df_denom(self):
        assert_equal(self.Ftest.df_denom, self.res1.model.df_resid)

    def test_Df_num(self):
        assert_equal(self.Ftest.df_num, 6)

class TestFTest2(object):
    """
    A joint test that the coefficient on
    GNP = the coefficient on UNEMP  and that the coefficient on
    POP = the coefficient on YEAR for the Longley dataset.

    Ftest1 is from statsmodels.  Results are from Rpy using R's car library.
    """
    @classmethod
    def setup_class(cls):
        data = longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]]
        cls.Ftest1 = res1.f_test(R2)
        hyp = 'x2 = x3, x5 = x6'
        cls.NewFtest1 = res1.f_test(hyp)

    def test_new_ftest(self):
        assert_equal(self.NewFtest1.fvalue, self.Ftest1.fvalue)

    def test_fvalue(self):
        assert_almost_equal(self.Ftest1.fvalue, 9.7404618732968196, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(self.Ftest1.pvalue, 0.0056052885317493459,
                            DECIMAL_4)

    def test_df_denom(self):
        assert_equal(self.Ftest1.df_denom, 9)

    def test_df_num(self):
        assert_equal(self.Ftest1.df_num, 2)


class TestFtestQ(object):
    """
    A joint hypothesis test that Rb = q.  Coefficient tests are essentially
    made up.  Test values taken from Stata.
    """
    @classmethod
    def setup_class(cls):
        data = longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
Loading ...