Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ discrete / tests / test_sandwich_cov.py

# -*- coding: utf-8 -*-
"""

Created on Mon Dec 09 21:29:20 2013

Author: Josef Perktold
"""

import os
import numpy as np
import pandas as pd
import statsmodels.discrete.discrete_model as smd
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families
from statsmodels.genmod.families import links
from statsmodels.regression.linear_model import OLS
from statsmodels.base.covtype import get_robustcov_results
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.tools import add_constant


from numpy.testing import assert_allclose, assert_equal, assert_
import statsmodels.tools._testing as smt


# get data and results as module global for now, TODO: move to class
from .results import results_count_robust_cluster as results_st

cur_dir = os.path.dirname(os.path.abspath(__file__))

filepath = os.path.join(cur_dir, "results", "ships.csv")
data_raw = pd.read_csv(filepath, index_col=False)
data = data_raw.dropna()

#mod = smd.Poisson.from_formula('accident ~ yr_con + op_75_79', data=dat)
# Do not use formula for tests against Stata because intercept needs to be last
endog = data['accident']
exog_data = data['yr_con op_75_79'.split()]
exog = add_constant(exog_data, prepend=False)
group = np.asarray(data['ship'], int)
exposure = np.asarray(data['service'])


# TODO get the test methods from regression/tests
class CheckCountRobustMixin(object):


    def test_basic(self):
        res1 = self.res1
        res2 = self.res2

        if len(res1.params) == (len(res2.params) - 1):
            # Stata includes lnalpha in table for NegativeBinomial
            mask = np.ones(len(res2.params), np.bool_)
            mask[-2] = False
            res2_params = res2.params[mask]
            res2_bse = res2.bse[mask]
        else:
            res2_params = res2.params
            res2_bse = res2.bse

        assert_allclose(res1._results.params, res2_params, 1e-4)

        assert_allclose(self.bse_rob / self.corr_fact, res2_bse, 6e-5)

    @classmethod
    def get_robust_clu(cls):
        res1 = cls.res1
        cov_clu = sw.cov_cluster(res1, group)
        cls.bse_rob = sw.se_cov(cov_clu)

        cls.corr_fact = cls.get_correction_factor(res1)

    @classmethod
    def get_correction_factor(cls, results, sub_kparams=True):
        mod = results.model
        nobs, k_vars = mod.exog.shape

        if sub_kparams:
            # TODO: document why we adjust by k_params for some classes
            #   but not others.
            k_params = len(results.params)
        else:
            k_params = 0

        corr_fact = (nobs - 1.) / float(nobs - k_params)
        # for bse we need sqrt of correction factor
        return np.sqrt(corr_fact)


    def test_oth(self):
        res1 = self.res1
        res2 = self.res2
        assert_allclose(res1._results.llf, res2.ll, 1e-4)
        assert_allclose(res1._results.llnull, res2.ll_0, 1e-4)


    def test_ttest(self):
        smt.check_ttest_tvalues(self.res1)


    def test_waldtest(self):
        smt.check_ftest_pvalues(self.res1)


class TestPoissonClu(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)
        cls.res1 = mod.fit(disp=False)
        cls.get_robust_clu()


class TestPoissonCluGeneric(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)
        cls.res1 = res1 = mod.fit(disp=False)

        debug = False
        if debug:
            # for debugging
            cls.bse_nonrobust = cls.res1.bse.copy()
            cls.res1 = res1 = mod.fit(disp=False)
            cls.get_robust_clu()
            cls.res3 = cls.res1
            cls.bse_rob3 = cls.bse_rob.copy()
            cls.res1 = res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'cluster',
                                                  groups=group,
                                                  use_correction=True,
                                                  df_correction=True,  #TODO has no effect
                                                  use_t=False, #True,
                                                  use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1)


class TestPoissonHC1Generic(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = smd.Poisson(endog, exog)
        cls.res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)


# TODO: refactor xxxFit to full testing results
class TestPoissonCluFit(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):


        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)

        # scaling of cov_params_default to match Stata
        # TODO should the default be changed?
        nobs, k_params = mod.exog.shape
        # TODO: this is similar but not identical to logic in
        #   get_correction_factor; can we de-duplicate?
        sc_fact = (nobs-1.) / float(nobs - k_params)

        cls.res1 = mod.fit(disp=False, cov_type='cluster',
                           cov_kwds=dict(groups=group,
                                         use_correction=True,
                                         scaling_factor=1. / sc_fact,
                                         df_correction=True),  #TODO has no effect
                           use_t=False, #True,
                           )

        # The model results, t_test, ... should also work without
        # normalized_cov_params, see #2209
        # Note: we cannot set on the wrapper res1, we need res1._results
        cls.res1._results.normalized_cov_params = None

        cls.bse_rob = cls.res1.bse

        # backwards compatibility with inherited test methods
        cls.corr_fact = 1


    def test_basic_inference(self):
        res1 = self.res1
        res2 = self.res2
        rtol = 1e-7
        assert_allclose(res1.params, res2.params, rtol=1e-8)
        assert_allclose(res1.bse, res2.bse, rtol=rtol)
        assert_allclose(res1.tvalues, res2.tvalues, rtol=rtol, atol=1e-8)
        assert_allclose(res1.pvalues, res2.pvalues, rtol=rtol, atol=1e-20)
        ci = res2.params_table[:, 4:6]
        assert_allclose(res1.conf_int(), ci, rtol=5e-7, atol=1e-20)


class TestPoissonHC1Fit(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = smd.Poisson(endog, exog)
        cls.res1 = mod.fit(disp=False, cov_type='HC1')

        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)


class TestPoissonHC1FitExposure(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_hc1
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = mod.fit(disp=False, cov_type='HC1')

        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)


class TestPoissonCluExposure(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = mod.fit(disp=False)
        cls.get_robust_clu()


class TestPoissonCluExposureGeneric(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'cluster',
                                                  groups=group,
                                                  use_correction=True,
                                                  df_correction=True,  #TODO has no effect
                                                  use_t=False, #True,
                                                  use_self=True)
        cls.bse_rob = cls.res1.bse #sw.se_cov(cov_clu)

        cls.corr_fact = cls.get_correction_factor(cls.res1)


class TestGLMPoissonClu(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)
        mod = GLM(endog, exog, family=families.Poisson())
        cls.res1 = mod.fit()
        cls.get_robust_clu()


class TestGLMPoissonCluGeneric(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = GLM(endog, exog, family=families.Poisson())
        cls.res1 = res1 = mod.fit()

        get_robustcov_results(cls.res1._results, 'cluster',
                                                  groups=group,
                                                  use_correction=True,
                                                  df_correction=True,  #TODO has no effect
                                                  use_t=False, #True,
                                                  use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1)


class TestGLMPoissonHC1Generic(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = GLM(endog, exog, family=families.Poisson())
        cls.res1 = mod.fit()

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)


# TODO: refactor xxxFit to full testing results
class TestGLMPoissonCluFit(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = GLM(endog, exog, family=families.Poisson())
        cls.res1 = res1 = mod.fit(cov_type='cluster',
                                  cov_kwds=dict(groups=group,
                                                use_correction=True,
                                                df_correction=True),  #TODO has no effect
                                  use_t=False, #True,
                                  )

        # The model results, t_test, ... should also work without
        # normalized_cov_params, see #2209
        # Note: we cannot set on the wrapper res1, we need res1._results
        cls.res1._results.normalized_cov_params = None

        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1)


class TestGLMPoissonHC1Fit(CheckCountRobustMixin):

    @classmethod
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = GLM(endog, exog, family=families.Poisson())
Loading ...