# -*- coding: utf-8 -*-
"""
Created on Mon Dec 09 21:29:20 2013
Author: Josef Perktold
"""
import os
import numpy as np
import pandas as pd
import statsmodels.discrete.discrete_model as smd
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families
from statsmodels.genmod.families import links
from statsmodels.regression.linear_model import OLS
from statsmodels.base.covtype import get_robustcov_results
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.tools import add_constant
from numpy.testing import assert_allclose, assert_equal, assert_
import statsmodels.tools._testing as smt
# get data and results as module global for now, TODO: move to class
from .results import results_count_robust_cluster as results_st
cur_dir = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(cur_dir, "results", "ships.csv")
data_raw = pd.read_csv(filepath, index_col=False)
data = data_raw.dropna()
#mod = smd.Poisson.from_formula('accident ~ yr_con + op_75_79', data=dat)
# Do not use formula for tests against Stata because intercept needs to be last
endog = data['accident']
exog_data = data['yr_con op_75_79'.split()]
exog = add_constant(exog_data, prepend=False)
group = np.asarray(data['ship'], int)
exposure = np.asarray(data['service'])
# TODO get the test methods from regression/tests
class CheckCountRobustMixin(object):
def test_basic(self):
res1 = self.res1
res2 = self.res2
if len(res1.params) == (len(res2.params) - 1):
# Stata includes lnalpha in table for NegativeBinomial
mask = np.ones(len(res2.params), np.bool_)
mask[-2] = False
res2_params = res2.params[mask]
res2_bse = res2.bse[mask]
else:
res2_params = res2.params
res2_bse = res2.bse
assert_allclose(res1._results.params, res2_params, 1e-4)
assert_allclose(self.bse_rob / self.corr_fact, res2_bse, 6e-5)
@classmethod
def get_robust_clu(cls):
res1 = cls.res1
cov_clu = sw.cov_cluster(res1, group)
cls.bse_rob = sw.se_cov(cov_clu)
cls.corr_fact = cls.get_correction_factor(res1)
@classmethod
def get_correction_factor(cls, results, sub_kparams=True):
mod = results.model
nobs, k_vars = mod.exog.shape
if sub_kparams:
# TODO: document why we adjust by k_params for some classes
# but not others.
k_params = len(results.params)
else:
k_params = 0
corr_fact = (nobs - 1.) / float(nobs - k_params)
# for bse we need sqrt of correction factor
return np.sqrt(corr_fact)
def test_oth(self):
res1 = self.res1
res2 = self.res2
assert_allclose(res1._results.llf, res2.ll, 1e-4)
assert_allclose(res1._results.llnull, res2.ll_0, 1e-4)
def test_ttest(self):
smt.check_ttest_tvalues(self.res1)
def test_waldtest(self):
smt.check_ftest_pvalues(self.res1)
class TestPoissonClu(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False)
cls.get_robust_clu()
class TestPoissonCluGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
cls.res1 = res1 = mod.fit(disp=False)
debug = False
if debug:
# for debugging
cls.bse_nonrobust = cls.res1.bse.copy()
cls.res1 = res1 = mod.fit(disp=False)
cls.get_robust_clu()
cls.res3 = cls.res1
cls.bse_rob3 = cls.bse_rob.copy()
cls.res1 = res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestPoissonHC1Generic(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
# TODO: refactor xxxFit to full testing results
class TestPoissonCluFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
# scaling of cov_params_default to match Stata
# TODO should the default be changed?
nobs, k_params = mod.exog.shape
# TODO: this is similar but not identical to logic in
# get_correction_factor; can we de-duplicate?
sc_fact = (nobs-1.) / float(nobs - k_params)
cls.res1 = mod.fit(disp=False, cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
scaling_factor=1. / sc_fact,
df_correction=True), #TODO has no effect
use_t=False, #True,
)
# The model results, t_test, ... should also work without
# normalized_cov_params, see #2209
# Note: we cannot set on the wrapper res1, we need res1._results
cls.res1._results.normalized_cov_params = None
cls.bse_rob = cls.res1.bse
# backwards compatibility with inherited test methods
cls.corr_fact = 1
def test_basic_inference(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-7
assert_allclose(res1.params, res2.params, rtol=1e-8)
assert_allclose(res1.bse, res2.bse, rtol=rtol)
assert_allclose(res1.tvalues, res2.tvalues, rtol=rtol, atol=1e-8)
assert_allclose(res1.pvalues, res2.pvalues, rtol=rtol, atol=1e-20)
ci = res2.params_table[:, 4:6]
assert_allclose(res1.conf_int(), ci, rtol=5e-7, atol=1e-20)
class TestPoissonHC1Fit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False, cov_type='HC1')
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
class TestPoissonHC1FitExposure(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_hc1
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = mod.fit(disp=False, cov_type='HC1')
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
class TestPoissonCluExposure(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = mod.fit(disp=False)
cls.get_robust_clu()
class TestPoissonCluExposureGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse #sw.se_cov(cov_clu)
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonClu(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = mod.fit()
cls.get_robust_clu()
class TestGLMPoissonCluGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = res1 = mod.fit()
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonHC1Generic(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = mod.fit()
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
# TODO: refactor xxxFit to full testing results
class TestGLMPoissonCluFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = res1 = mod.fit(cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
df_correction=True), #TODO has no effect
use_t=False, #True,
)
# The model results, t_test, ... should also work without
# normalized_cov_params, see #2209
# Note: we cannot set on the wrapper res1, we need res1._results
cls.res1._results.normalized_cov_params = None
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonHC1Fit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = GLM(endog, exog, family=families.Poisson())
Loading ...