Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ regression / tests / test_robustcov.py

# -*- coding: utf-8 -*-
"""Testing OLS robust covariance matrices against STATA

Created on Mon Oct 28 15:25:14 2013

Author: Josef Perktold
"""

import pytest
import numpy as np
from scipy import stats

from numpy.testing import (assert_allclose, assert_equal, assert_warns,
                           assert_raises)


from statsmodels.regression.linear_model import OLS, WLS
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.tools import add_constant
from statsmodels.datasets import macrodata
from statsmodels.tools.sm_exceptions import InvalidTestWarning

from .results import results_macro_ols_robust as res
from .results import results_grunfeld_ols_robust_cluster as res2
# TODO: implement test_hac_simple


class CheckOLSRobust(object):

    def test_basic(self):
        res1 = self.res1
        res2 = self.res2
        rtol = getattr(self, 'rtol', 1e-10)
        assert_allclose(res1.params, res2.params, rtol=rtol)
        assert_allclose(self.bse_robust, res2.bse, rtol=rtol)
        assert_allclose(self.cov_robust, res2.cov, rtol=rtol)

    @pytest.mark.smoke
    def test_t_test_summary(self):
        res1 = self.res1
        mat = np.eye(len(res1.params))
        # TODO: if the t_test call is expensive, possibly make it a fixture?
        tt = res1.t_test(mat, cov_p=self.cov_robust)

        tt.summary()

    @pytest.mark.smoke
    def test_t_test_summary_frame(self):
        res1 = self.res1
        mat = np.eye(len(res1.params))
        tt = res1.t_test(mat, cov_p=self.cov_robust)

        tt.summary_frame()

    @pytest.mark.smoke
    def test_f_test_summary(self):
        res1 = self.res1
        mat = np.eye(len(res1.params))
        ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)

        ft.summary()

    def test_tests(self):  # TODO: break into well-scoped tests
        # Note: differences between small (t-distribution, ddof) and large (normal)
        # F statistic has no ddof correction in large, but uses F distribution (?)
        res1 = self.res1
        res2 = self.res2
        rtol = getattr(self, 'rtol', 1e-10)
        rtolh = getattr(self, 'rtolh', 1e-12)
        mat = np.eye(len(res1.params))
        tt = res1.t_test(mat, cov_p=self.cov_robust)
        # has 'effect', 'pvalue', 'sd', 'tvalue'
        # TODO confint missing
        assert_allclose(tt.effect, res2.params, rtol=rtol)
        assert_allclose(tt.sd, res2.bse, rtol=rtol)
        assert_allclose(tt.tvalue, res2.tvalues, rtol=rtol)
        if self.small:
            assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
        else:
            pval = stats.norm.sf(np.abs(tt.tvalue)) * 2
            assert_allclose(pval, res2.pvalues, rtol=5 * rtol, atol=1e-25)

        ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
        if self.small:
            #'df_denom', 'df_num', 'fvalue', 'pvalue'
            assert_allclose(ft.fvalue, res2.F, rtol=rtol)
            # f-pvalue is not directly available in Stata results, but is in ivreg2
            if hasattr(res2, 'Fp'):
                assert_allclose(ft.pvalue, res2.Fp, rtol=rtol)
        else:
            if not getattr(self, 'skip_f', False):
                dof_corr = res1.df_resid * 1. / res1.nobs
                assert_allclose(ft.fvalue * dof_corr, res2.F, rtol=rtol)

        if hasattr(res2, 'df_r'):
            assert_equal(ft.df_num, res2.df_m)
            assert_equal(ft.df_denom, res2.df_r)
        else:
            # ivreg2
            assert_equal(ft.df_num, res2.Fdf1)
            assert_equal(ft.df_denom, res2.Fdf2)


class TestOLSRobust1(CheckOLSRobust):
    # compare with regress robust

    def setup(self):
        res_ols = self.res1
        self.bse_robust = res_ols.HC1_se
        self.cov_robust = res_ols.cov_HC1
        self.small = True
        self.res2 = res.results_hc0

    @classmethod
    def setup_class(cls):
        d2 = macrodata.load_pandas().data
        g_gdp = 400*np.diff(np.log(d2['realgdp'].values))
        g_inv = 400*np.diff(np.log(d2['realinv'].values))
        exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values], prepend=False)

        cls.res1 = res_ols = OLS(g_inv, exogg).fit()


class TestOLSRobust2(TestOLSRobust1):
    # compare with ivreg robust small

    def setup(self):
        res_ols = self.res1
        self.bse_robust = res_ols.HC1_se
        self.cov_robust = res_ols.cov_HC1
        self.small = True

        self.res2 = res.results_ivhc0_small



class TestOLSRobust3(TestOLSRobust1):
    # compare with ivreg robust   (not small)

    def setup(self):
        res_ols = self.res1
        self.bse_robust = res_ols.HC0_se
        self.cov_robust = res_ols.cov_HC0
        self.small = False

        self.res2 = res.results_ivhc0_large


class TestOLSRobustHacSmall(TestOLSRobust1):
    # compare with ivreg robust small

    def setup(self):
        res_ols = self.res1
        cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
        se1 =  sw.se_cov(cov1)
        self.bse_robust = se1
        self.cov_robust = cov1
        self.small = True

        self.res2 = res.results_ivhac4_small



class TestOLSRobustHacLarge(TestOLSRobust1):
    # compare with ivreg robust (not small)

    def setup(self):
        res_ols = self.res1
        cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False)
        se1 =  sw.se_cov(cov1)
        self.bse_robust = se1
        self.cov_robust = cov1
        self.small = False

        self.res2 = res.results_ivhac4_large


class CheckOLSRobustNewMixin(object):
    # This uses the robust covariance as default covariance

    def test_compare(self):
        rtol = getattr(self, 'rtol', 1e-10)
        assert_allclose(self.cov_robust, self.cov_robust2, rtol=rtol)
        assert_allclose(self.bse_robust, self.bse_robust2, rtol=rtol)

    def test_fvalue(self):
        if not getattr(self, 'skip_f', False):
            rtol = getattr(self, 'rtol', 1e-10)
            assert_allclose(self.res1.fvalue, self.res2.F, rtol=rtol)
            if hasattr(self.res2, 'Fp'):
                # only available with ivreg2
                assert_allclose(self.res1.f_pvalue, self.res2.Fp, rtol=rtol)
        else:
            raise pytest.skip("TODO: document why this test is skipped")

    def test_confint(self):
        rtol = getattr(self, 'rtol', 1e-10)
        ci1 = self.res1.conf_int()
        ci2 = self.res2.params_table[:,4:6]
        assert_allclose(ci1, ci2, rtol=rtol)

        # check critical value
        crit1 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
        crit2 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
        assert_allclose(crit1, crit2, rtol=12)


    def test_ttest(self):
        res1 = self.res1
        res2 = self.res2
        rtol = getattr(self, 'rtol', 1e-10)
        rtolh = getattr(self, 'rtol', 1e-12)

        mat = np.eye(len(res1.params))
        tt = res1.t_test(mat, cov_p=self.cov_robust)
        # has 'effect', 'pvalue', 'sd', 'tvalue'
        # TODO confint missing
        assert_allclose(tt.effect, res2.params, rtol=rtolh)
        assert_allclose(tt.sd, res2.bse, rtol=rtol)
        assert_allclose(tt.tvalue, res2.tvalues, rtol=rtolh)
        assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
        ci1 = tt.conf_int()
        ci2 = self.res2.params_table[:,4:6]
        assert_allclose(ci1, ci2, rtol=rtol)


    def test_scale(self):
        res1 = self.res1
        res2 = self.res2
        rtol = 1e-5
        # Note we always use df_resid for scale
        # Stata uses nobs or df_resid for rmse, not always available in Stata
        #assert_allclose(res1.scale, res2.rmse**2 * res2.N / (res2.N - res2.df_m - 1), rtol=rtol)
        skip = False
        if hasattr(res2, 'rss'):
            scale = res2.rss / (res2.N - res2.df_m - 1)
        elif hasattr(res2, 'rmse'):
            scale = res2.rmse**2
        else:
            skip = True

        if isinstance(res1.model, WLS):
            skip = True
            # Stata uses different scaling and using unweighted resid for rmse

        if not skip:
            assert_allclose(res1.scale, scale, rtol=rtol)

        if not res2.vcetype == 'Newey-West':
            # no rsquared in Stata
            r2 = res2.r2 if hasattr(res2, 'r2') else res2.r2c
            assert_allclose(res1.rsquared, r2, rtol=rtol, err_msg=str(skip))


        # consistency checks, not against Stata
        df_resid = res1.nobs - res1.df_model - 1
        assert_equal(res1.df_resid, df_resid)
        # variance of resid_pearson is 1, with ddof, and loc=0
        psum = (res1.resid_pearson**2).sum()
        assert_allclose(psum, df_resid, rtol=1e-13)

    @pytest.mark.smoke
    def test_summary(self):
        self.res1.summary()


class TestOLSRobust2SmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
    # compare with ivreg robust small

    def setup(self):
        res_ols = self.res1.get_robustcov_results('HC1', use_t=True)
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        self.bse_robust2 = res_ols.HC1_se
        self.cov_robust2 = res_ols.cov_HC1
        self.small = True
        self.res2 = res.results_ivhc0_small

    def test_compare(self):
        #check that we get a warning using the nested compare methods
        res1 = self.res1
        endog = res1.model.endog
        exog = res1.model.exog[:, [0, 2]]  # drop one variable
        res_ols2 = OLS(endog, exog).fit()
        # results from Stata
        r_pval =  .0307306938402991
        r_chi2 =  4.667944083588736
        r_df =  1
        assert_warns(InvalidTestWarning, res1.compare_lr_test, res_ols2)
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            chi2, pval, df = res1.compare_lr_test(res_ols2)
        assert_allclose(chi2, r_chi2, rtol=1e-11)
        assert_allclose(pval, r_pval, rtol=1e-11)
        assert_equal(df, r_df)

        assert_warns(InvalidTestWarning, res1.compare_f_test, res_ols2)
        #fva, pval, df = res1.compare_f_test(res_ols2)



class TestOLSRobustHACSmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
    # compare with ivreg robust small

    def setup(self):
        res_ols = self.res1.get_robustcov_results('HAC', maxlags=4,
                                                  use_correction=True,
                                                  use_t=True)
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
        se1 =  sw.se_cov(cov1)
        self.bse_robust2 = se1
        self.cov_robust2 = cov1
        self.small = True
        self.res2 = res.results_ivhac4_small


class TestOLSRobust2LargeNew(TestOLSRobust1, CheckOLSRobustNewMixin):
    # compare with ivreg robust small

    def setup(self):
        res_ols = self.res1.get_robustcov_results('HC0')
        res_ols.use_t = False
        self.res3 = self.res1
        self.res1 = res_ols
        self.bse_robust = res_ols.bse
        self.cov_robust = res_ols.cov_params()
        self.bse_robust2 = res_ols.HC0_se
        self.cov_robust2 = res_ols.cov_HC0
        self.small = False
        self.res2 = res.results_ivhc0_large

    @pytest.mark.skip(reason="not refactored yet for `large`")
    def test_fvalue(self):
        super(TestOLSRobust2LargeNew, self).test_fvalue()

    @pytest.mark.skip(reason="not refactored yet for `large`")
    def test_confint(self):
        super(TestOLSRobust2LargeNew, self).test_confint()
Loading ...