Repository URL to install this package:
|
Version:
0.10.2 ▾
|
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 09 21:29:20 2013
Author: Josef Perktold
"""
import os
import numpy as np
import pandas as pd
import statsmodels.discrete.discrete_model as smd
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families
from statsmodels.genmod.families import links
from statsmodels.regression.linear_model import OLS
from statsmodels.base.covtype import get_robustcov_results
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.tools import add_constant
from numpy.testing import assert_allclose, assert_equal, assert_
import statsmodels.tools._testing as smt
# get data and results as module global for now, TODO: move to class
from .results import results_count_robust_cluster as results_st
cur_dir = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(cur_dir, "results", "ships.csv")
data_raw = pd.read_csv(filepath, index_col=False)
data = data_raw.dropna()
#mod = smd.Poisson.from_formula('accident ~ yr_con + op_75_79', data=dat)
# Don't use formula for tests against Stata because intercept needs to be last
endog = data['accident']
exog_data = data['yr_con op_75_79'.split()]
exog = add_constant(exog_data, prepend=False)
group = np.asarray(data['ship'], int)
exposure = np.asarray(data['service'])
# TODO get the test methods from regression/tests
class CheckCountRobustMixin(object):
def test_basic(self):
res1 = self.res1
res2 = self.res2
if len(res1.params) == (len(res2.params) - 1):
# Stata includes lnalpha in table for NegativeBinomial
mask = np.ones(len(res2.params), np.bool_)
mask[-2] = False
res2_params = res2.params[mask]
res2_bse = res2.bse[mask]
else:
res2_params = res2.params
res2_bse = res2.bse
assert_allclose(res1._results.params, res2_params, 1e-4)
assert_allclose(self.bse_rob / self.corr_fact, res2_bse, 6e-5)
@classmethod
def get_robust_clu(cls):
res1 = cls.res1
cov_clu = sw.cov_cluster(res1, group)
cls.bse_rob = sw.se_cov(cov_clu)
cls.corr_fact = cls.get_correction_factor(res1)
@classmethod
def get_correction_factor(cls, results, sub_kparams=True):
mod = results.model
nobs, k_vars = mod.exog.shape
if sub_kparams:
# TODO: document why we adjust by k_params for some classes
# but not others.
k_params = len(results.params)
else:
k_params = 0
corr_fact = (nobs - 1.) / float(nobs - k_params)
# for bse we need sqrt of correction factor
return np.sqrt(corr_fact)
def test_oth(self):
res1 = self.res1
res2 = self.res2
assert_allclose(res1._results.llf, res2.ll, 1e-4)
assert_allclose(res1._results.llnull, res2.ll_0, 1e-4)
def test_ttest(self):
smt.check_ttest_tvalues(self.res1)
def test_waldtest(self):
smt.check_ftest_pvalues(self.res1)
class TestPoissonClu(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False)
cls.get_robust_clu()
class TestPoissonCluGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
cls.res1 = res1 = mod.fit(disp=False)
debug = False
if debug:
# for debugging
cls.bse_nonrobust = cls.res1.bse.copy()
cls.res1 = res1 = mod.fit(disp=False)
cls.get_robust_clu()
cls.res3 = cls.res1
cls.bse_rob3 = cls.bse_rob.copy()
cls.res1 = res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestPoissonHC1Generic(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
# TODO: refactor xxxFit to full testing results
class TestPoissonCluFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
# scaling of cov_params_default to match Stata
# TODO should the default be changed?
nobs, k_params = mod.exog.shape
# TODO: this is similar but not identical to logic in
# get_correction_factor; can we de-duplicate?
sc_fact = (nobs-1.) / float(nobs - k_params)
cls.res1 = mod.fit(disp=False, cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
scaling_factor=1. / sc_fact,
df_correction=True), #TODO has no effect
use_t=False, #True,
)
# The model results, t_test, ... should also work without
# normalized_cov_params, see #2209
# Note: we cannot set on the wrapper res1, we need res1._results
cls.res1._results.normalized_cov_params = None
cls.bse_rob = cls.res1.bse
# backwards compatibility with inherited test methods
cls.corr_fact = 1
def test_basic_inference(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-7
assert_allclose(res1.params, res2.params, rtol=1e-8)
assert_allclose(res1.bse, res2.bse, rtol=rtol)
assert_allclose(res1.tvalues, res2.tvalues, rtol=rtol, atol=1e-8)
assert_allclose(res1.pvalues, res2.pvalues, rtol=rtol, atol=1e-20)
ci = res2.params_table[:, 4:6]
assert_allclose(res1.conf_int(), ci, rtol=5e-7, atol=1e-20)
class TestPoissonHC1Fit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = smd.Poisson(endog, exog)
cls.res1 = mod.fit(disp=False, cov_type='HC1')
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
class TestPoissonHC1FitExposure(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_hc1
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = mod.fit(disp=False, cov_type='HC1')
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
class TestPoissonCluExposure(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = mod.fit(disp=False)
cls.get_robust_clu()
class TestPoissonCluExposureGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
mod = smd.Poisson(endog, exog, exposure=exposure)
cls.res1 = res1 = mod.fit(disp=False)
from statsmodels.base.covtype import get_robustcov_results
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse #sw.se_cov(cov_clu)
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonClu(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = smd.Poisson(endog, exog)
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = mod.fit()
cls.get_robust_clu()
class TestGLMPoissonCluGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = res1 = mod.fit()
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonHC1Generic(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = mod.fit()
#res_hc0_ = cls.res1.get_robustcov_results('HC1')
get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
# TODO: refactor xxxFit to full testing results
class TestGLMPoissonCluFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_clu
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = res1 = mod.fit(cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
df_correction=True), #TODO has no effect
use_t=False, #True,
)
# The model results, t_test, ... should also work without
# normalized_cov_params, see #2209
# Note: we cannot set on the wrapper res1, we need res1._results
cls.res1._results.normalized_cov_params = None
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestGLMPoissonHC1Fit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_poisson_hc1
mod = GLM(endog, exog, family=families.Poisson())
cls.res1 = mod.fit(cov_type='HC1')
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
class TestNegbinClu(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_negbin_clu
mod = smd.NegativeBinomial(endog, exog)
cls.res1 = mod.fit(disp=False, gtol=1e-7)
cls.get_robust_clu()
class TestNegbinCluExposure(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_negbin_exposure_clu #nonrobust
mod = smd.NegativeBinomial(endog, exog, exposure=exposure)
cls.res1 = mod.fit(disp=False)
cls.get_robust_clu()
# mod_nbe = smd.NegativeBinomial(endog, exog, exposure=data['service'])
# res_nbe = mod_nbe.fit()
# mod_nb = smd.NegativeBinomial(endog, exog)
# res_nb = mod_nb.fit()
#
# cov_clu_nb = sw.cov_cluster(res_nb, group)
# k_params = k_vars + 1
# print sw.se_cov(cov_clu_nb / ((nobs-1.) / float(nobs - k_params)))
#
# wt = res_nb.wald_test(np.eye(len(res_nb.params))[1:3], cov_p=cov_clu_nb/((nobs-1.) / float(nobs - k_params)))
# print wt
#
# print dir(results_st)
class TestNegbinCluGeneric(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_negbin_clu
mod = smd.NegativeBinomial(endog, exog)
cls.res1 = res1 = mod.fit(disp=False, gtol=1e-7)
get_robustcov_results(cls.res1._results, 'cluster',
groups=group,
use_correction=True,
df_correction=True, #TODO has no effect
use_t=False, #True,
use_self=True)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestNegbinCluFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_negbin_clu
mod = smd.NegativeBinomial(endog, exog)
cls.res1 = res1 = mod.fit(disp=False, cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
df_correction=True), #TODO has no effect
use_t=False, #True,
gtol=1e-7)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class TestNegbinCluExposureFit(CheckCountRobustMixin):
@classmethod
def setup_class(cls):
cls.res2 = results_st.results_negbin_exposure_clu #nonrobust
mod = smd.NegativeBinomial(endog, exog, exposure=exposure)
cls.res1 = res1 = mod.fit(disp=False, cov_type='cluster',
cov_kwds=dict(groups=group,
use_correction=True,
df_correction=True), #TODO has no effect
use_t=False, #True,
)
cls.bse_rob = cls.res1.bse
cls.corr_fact = cls.get_correction_factor(cls.res1)
class CheckDiscreteGLM(object):
# compare GLM with other models, no verified reference results
def test_basic(self):
res1 = self.res1
res2 = self.res2
assert_equal(res1.cov_type, self.cov_type)
assert_equal(res2.cov_type, self.cov_type)
rtol = getattr(res1, 'rtol', 1e-13)
assert_allclose(res1.params, res2.params, rtol=rtol)
assert_allclose(res1.bse, res2.bse, rtol=1e-10)
class TestGLMLogit(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
endog_bin = (endog > endog.mean()).astype(int)
cls.cov_type = 'cluster'
mod1 = GLM(endog_bin, exog, family=families.Binomial())
cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
mod1 = smd.Logit(endog_bin, exog)
cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
class TestGLMProbit(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
endog_bin = (endog > endog.mean()).astype(int)
cls.cov_type = 'cluster'
mod1 = GLM(endog_bin, exog, family=families.Binomial(link=links.probit()))
cls.res1 = mod1.fit(method='newton',
cov_type='cluster', cov_kwds=dict(groups=group))
mod1 = smd.Probit(endog_bin, exog)
cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
cls.rtol = 1e-6
def test_score_hessian(self):
res1 = self.res1
res2 = self.res2
# Note scale is fixed at 1, so we don't need to fix it explicitly
score1 = res1.model.score(res1.params * 0.98)
score2 = res2.model.score(res1.params * 0.98)
assert_allclose(score1, score2, rtol=1e-13)
hess1 = res1.model.hessian(res1.params)
hess2 = res2.model.hessian(res1.params)
assert_allclose(hess1, hess2, rtol=1e-10)
class TestGLMGaussNonRobust(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'nonrobust'
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit()
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit()
class TestGLMGaussClu(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'cluster'
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
class TestGLMGaussHC(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'HC0'
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HC0')
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='HC0')
class TestGLMGaussHAC(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'HAC'
kwds={'maxlags':2}
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
class TestGLMGaussHAC2(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'HAC'
# check kernel specified as string
kwds = {'kernel': 'bartlett', 'maxlags': 2}
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)
mod2 = OLS(endog, exog)
kwds2 = {'maxlags': 2}
cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds2)
class TestGLMGaussHACUniform(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'HAC'
kwds={'kernel':sw.weights_uniform, 'maxlags':2}
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
#for debugging
cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags':2})
def test_cov_options(self):
# check keyword `weights_func
kwdsa = {'weights_func': sw.weights_uniform, 'maxlags': 2}
res1a = self.res1.model.fit(cov_type='HAC', cov_kwds=kwdsa)
res2a = self.res2.model.fit(cov_type='HAC', cov_kwds=kwdsa)
assert_allclose(res1a.bse, self.res1.bse, rtol=1e-12)
assert_allclose(res2a.bse, self.res2.bse, rtol=1e-12)
# regression test for bse values
bse = np.array([ 2.82203924, 4.60199596, 11.01275064])
assert_allclose(res1a.bse, bse, rtol=1e-6)
assert_(res1a.cov_kwds['weights_func'] is sw.weights_uniform)
kwdsb = {'kernel': sw.weights_bartlett, 'maxlags': 2}
res1a = self.res1.model.fit(cov_type='HAC', cov_kwds=kwdsb)
res2a = self.res2.model.fit(cov_type='HAC', cov_kwds=kwdsb)
assert_allclose(res1a.bse, res2a.bse, rtol=1e-12)
# regression test for bse values
bse = np.array([ 2.502264, 3.697807, 9.193303])
assert_allclose(res1a.bse, bse, rtol=1e-6)
class TestGLMGaussHACUniform2(TestGLMGaussHACUniform):
@classmethod
def setup_class(cls):
cls.cov_type = 'HAC'
kwds={'kernel': sw.weights_uniform, 'maxlags': 2}
mod1 = GLM(endog, exog, family=families.Gaussian())
cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds)
# check kernel as string
mod2 = OLS(endog, exog)
kwds2 = {'kernel': 'uniform', 'maxlags': 2}
cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
class TestGLMGaussHACPanel(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'hac-panel'
# time index is just made up to have a test case
time = np.tile(np.arange(7), 5)[:-1]
mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian())
kwds = dict(time=time,
maxlags=2,
kernel=sw.weights_uniform,
use_correction='hac',
df_correction=False)
cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds)
cls.res1b = mod1.fit(cov_type='nw-panel', cov_kwds=kwds)
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def test_kwd(self):
# test corrected keyword name
assert_allclose(self.res1b.bse, self.res1.bse, rtol=1e-12)
class TestGLMGaussHACPanelGroups(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'hac-panel'
# time index is just made up to have a test case
groups = np.repeat(np.arange(5), 7)[:-1]
mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian())
kwds = dict(groups=pd.Series(groups), # check for #3606
maxlags=2,
kernel=sw.weights_uniform,
use_correction='hac',
df_correction=False)
cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds)
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
class TestGLMGaussHACGroupsum(CheckDiscreteGLM):
@classmethod
def setup_class(cls):
cls.cov_type = 'hac-groupsum'
# time index is just made up to have a test case
time = np.tile(np.arange(7), 5)[:-1]
mod1 = GLM(endog, exog, family=families.Gaussian())
kwds = dict(time=pd.Series(time), # check for #3606
maxlags=2,
use_correction='hac',
df_correction=False)
cls.res1 = mod1.fit(cov_type='hac-groupsum', cov_kwds=kwds)
cls.res1b = mod1.fit(cov_type='nw-groupsum', cov_kwds=kwds)
mod2 = OLS(endog, exog)
cls.res2 = mod2.fit(cov_type='hac-groupsum', cov_kwds=kwds)
def test_kwd(self):
# test corrected keyword name
assert_allclose(self.res1b.bse, self.res1.bse, rtol=1e-12)