Repository URL to install this package:
Version:
0.11.1 ▾
|
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 04 13:19:01 2013
Author: Josef Perktold
"""
from statsmodels.compat.python import lrange, lmap
import os
import copy
import pytest
import numpy as np
from numpy.testing import assert_allclose, assert_equal
import pandas as pd
from statsmodels.tools.tools import add_constant
from statsmodels.regression.linear_model import OLS
import statsmodels.sandbox.regression.gmm as gmm
def get_griliches76_data():
curdir = os.path.split(__file__)[0]
path = os.path.join(curdir, 'griliches76.dta')
griliches76_data = pd.read_stata(path)
# create year dummies
years = griliches76_data['year'].unique()
N = griliches76_data.shape[0]
for yr in years:
griliches76_data['D_%i' % yr] = np.zeros(N)
for i in range(N):
if griliches76_data.loc[griliches76_data.index[i], 'year'] == yr:
griliches76_data.loc[griliches76_data.index[i], 'D_%i' % yr] = 1
else:
pass
griliches76_data['const'] = 1
X = add_constant(griliches76_data[['s', 'iq', 'expr', 'tenure', 'rns',
'smsa', 'D_67', 'D_68', 'D_69', 'D_70',
'D_71', 'D_73']],
#prepend=False) # for Stata comparison
prepend=True) # for R comparison
Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \
'D_67', 'D_68', 'D_69', 'D_70', 'D_71',
'D_73', 'med', 'kww', 'age', 'mrt']])
Y = griliches76_data['lw']
return Y, X, Z
# use module global to load only once
yg_df, xg_df, zg_df = get_griliches76_data()
endog = np.asarray(yg_df, dtype=float) # TODO: why is yg_df float32
exog, instrument = lmap(np.asarray, [xg_df, zg_df])
assert exog.dtype == np.float64
assert instrument.dtype == np.float64
# from R
#-----------------
varnames = np.array(["(Intercept)", "s", "iq", "expr", "tenure", "rns", "smsa", "D_67", "D_68", "D_69", "D_70",
"D_71", "D_73"])
params = np.array([ 4.03350989, 0.17242531, -0.00909883, 0.04928949, 0.04221709,
-0.10179345, 0.12611095, -0.05961711, 0.04867956, 0.15281763,
0.17443605, 0.09166597, 0.09323977])
bse = np.array([ 0.31816162, 0.02091823, 0.00474527, 0.00822543, 0.00891969,
0.03447337, 0.03119615, 0.05577582, 0.05246796, 0.05201092,
0.06027671, 0.05461436, 0.05767865])
tvalues = np.array([ 12.6775501, 8.2428242, -1.9174531, 5.9923305, 4.7330205,
-2.9528144, 4.0425165, -1.0688701, 0.9277959, 2.9381834,
2.8939212, 1.6784225, 1.6165385])
pvalues = np.array([ 1.72360000e-33, 7.57025400e-16, 5.55625000e-02,
3.21996700e-09, 2.64739100e-06, 3.24794100e-03,
5.83809900e-05, 2.85474400e-01, 3.53813900e-01,
3.40336100e-03, 3.91575100e-03, 9.36840200e-02,
1.06401300e-01])
#-----------------
def test_iv2sls_r():
mod = gmm.IV2SLS(endog, exog, instrument)
res = mod.fit()
# print(res.params)
# print(res.params - params)
n, k = exog.shape
assert_allclose(res.params, params, rtol=1e-7, atol=1e-9)
# TODO: check df correction
#assert_allclose(res.bse * np.sqrt((n - k) / (n - k - 1.)), bse,
assert_allclose(res.bse, bse, rtol=0, atol=3e-7)
# GH 3849
assert not hasattr(mod, '_results')
def test_ivgmm0_r():
n, k = exog.shape
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(np.ones(exog.shape[1], float), maxiter=0, inv_weights=w0inv,
optim_method='bfgs',
optim_args={'gtol':1e-8, 'disp': 0})
assert_allclose(res.params, params, rtol=1e-4, atol=1e-4)
# TODO : res.bse and bse are not the same, rtol=0.09 is large in this case
#res.bse is still robust?, bse is not a sandwich ?
assert_allclose(res.bse, bse, rtol=0.09, atol=0)
score = res.model.score(res.params, w0)
assert_allclose(score, np.zeros(score.shape), rtol=0, atol=5e-6) # atol=1e-8) ??
def test_ivgmm1_stata():
# copied constant to the beginning
params_stata = np.array(
[ 4.0335099 , 0.17242531, -0.00909883, 0.04928949, 0.04221709,
-0.10179345, 0.12611095, -0.05961711, 0.04867956, 0.15281763,
0.17443605, 0.09166597, 0.09323976])
# robust bse with gmm onestep
bse_stata = np.array(
[ 0.33503289, 0.02073947, 0.00488624, 0.0080498 , 0.00946363,
0.03371053, 0.03081138, 0.05171372, 0.04981322, 0.0479285 ,
0.06112515, 0.0554618 , 0.06084901])
n, k = exog.shape
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
w0 = np.linalg.inv(w0inv)
start = OLS(endog, exog).fit().params
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0})
# move constant to end for Stata
idx = lrange(len(params))
idx = idx[1:] + idx[:1]
exog_st = exog[:, idx]
class TestGMMOLS(object):
@classmethod
def setup_class(cls):
exog = exog_st # with const at end
res_ols = OLS(endog, exog).fit()
# use exog as instrument
nobs, k_instr = exog.shape
w0inv = np.dot(exog.T, exog) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, exog)
res = mod.fit(np.ones(exog.shape[1], float), maxiter=0, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0})
cls.res1 = res
cls.res2 = res_ols
def test_basic(self):
res1, res2 = self.res1, self.res2
# test both absolute and relative difference
assert_allclose(res1.params, res2.params, rtol=5e-4, atol=0)
assert_allclose(res1.params, res2.params, rtol=0, atol=1e-5)
n = res1.model.exog.shape[0]
dffac = 1#np.sqrt((n - 1.) / n) # currently different df in cov calculation
assert_allclose(res1.bse * dffac, res2.HC0_se, rtol=5e-6, atol=0)
assert_allclose(res1.bse * dffac, res2.HC0_se, rtol=0, atol=1e-7)
@pytest.mark.xfail(reason="Not asserting anything meaningful",
raises=NotImplementedError, strict=True)
def test_other(self):
res1, res2 = self.res1, self.res2
raise NotImplementedError
class CheckGMM(object):
params_tol = [5e-6, 5e-6]
bse_tol = [5e-7, 5e-7]
def test_basic(self):
res1, res2 = self.res1, self.res2
# test both absolute and relative difference
rtol, atol = self.params_tol
assert_allclose(res1.params, res2.params, rtol=rtol, atol=0)
assert_allclose(res1.params, res2.params, rtol=0, atol=atol)
n = res1.model.exog.shape[0]
dffac = 1 #np.sqrt((n - 1.) / n) # currently different df in cov calculation
rtol, atol = self.bse_tol
assert_allclose(res1.bse * dffac, res2.bse, rtol=rtol, atol=0)
assert_allclose(res1.bse * dffac, res2.bse, rtol=0, atol=atol)
def test_other(self):
# TODO: separate Q and J tests
res1, res2 = self.res1, self.res2
assert_allclose(res1.q, res2.Q, rtol=5e-6, atol=0)
assert_allclose(res1.jval, res2.J, rtol=5e-5, atol=0)
def test_hypothesis(self):
res1, res2 = self.res1, self.res2
restriction = np.eye(len(res1.params))
res_t = res1.t_test(restriction)
assert_allclose(res_t.tvalue, res1.tvalues, rtol=1e-12, atol=0)
assert_allclose(res_t.pvalue, res1.pvalues, rtol=1e-12, atol=0)
rtol, atol = self.bse_tol
assert_allclose(res_t.tvalue, res2.tvalues, rtol=rtol*10, atol=atol)
assert_allclose(res_t.pvalue, res2.pvalues, rtol=rtol*10, atol=atol)
res_f = res1.f_test(restriction[:-1]) # without constant
# comparison with fvalue is not possible, those are not defined
# assert_allclose(res_f.fvalue, res1.fvalue, rtol=1e-12, atol=0)
# assert_allclose(res_f.pvalue, res1.f_pvalue, rtol=1e-12, atol=0)
# assert_allclose(res_f.fvalue, res2.F, rtol=1e-10, atol=0)
# assert_allclose(res_f.pvalue, res2.Fp, rtol=1e-08, atol=0)
# Smoke test for Wald
res_wald = res1.wald_test(restriction[:-1])
@pytest.mark.smoke
def test_summary(self):
res1 = self.res1
summ = res1.summary()
# len + 1 is for header line
assert_equal(len(summ.tables[1]), len(res1.params) + 1)
def test_use_t(self):
# Copy to avoid cache
res1 = copy.deepcopy(self.res1)
res1.use_t = True
summ = res1.summary()
assert 'P>|t|' in str(summ)
assert 'P>|z|' not in str(summ)
class TestGMMSt1(CheckGMM):
@classmethod
def setup_class(cls):
#cls.bse_tol = [5e-7, 5e-7]
# compare to Stata default options, iterative GMM
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res10 = mod.fit(start, maxiter=10, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False})
cls.res1 = res10
from .results_gmm_griliches_iter import results
cls.res2 = results
class TestGMMStTwostep(CheckGMM):
#compares has_optimal_weights=True with Stata's has_optimal_weights=False
@classmethod
def setup_class(cls):
# compare to Stata default options, twostep GMM
cls.params_tol = [5e-5, 5e-6]
cls.bse_tol = [5e-6, 5e-7]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res10 = mod.fit(start, maxiter=2, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False})
cls.res1 = res10
from .results_gmm_griliches import results_twostep as results
cls.res2 = results
class TestGMMStTwostepNO(CheckGMM):
#with Stata default `has_optimal_weights=False`
@classmethod
def setup_class(cls):
# compare to Stata default options, twostep GMM
cls.params_tol = [5e-5, 5e-6]
cls.bse_tol = [1e-6, 5e-5]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res10 = mod.fit(start, maxiter=2, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res1 = res10
from .results_gmm_griliches import results_twostep as results
cls.res2 = results
class TestGMMStOnestep(CheckGMM):
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
cls.params_tol = [5e-4, 5e-5]
cls.bse_tol = [7e-3, 5e-4]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=0, inv_weights=w0inv,
optim_method='bfgs',
optim_args={'gtol':1e-6, 'disp': 0})
cls.res1 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
def test_bse_other(self):
res1, res2 = self.res1, self.res2
# try other versions for bse,
# TODO: next two produce the same as before (looks like)
bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False))))
#weights=res1.weights))))
# TODO: does not look different
#assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0)
#nobs = instrument.shape[0]
#w0inv = np.dot(instrument.T, instrument) / nobs
q = self.res1.model.gmmobjective(self.res1.params, np.linalg.inv(self.res1.weights))
#assert_allclose(q, res2.Q, rtol=5e-6, atol=0)
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOnestep, self).test_other()
class TestGMMStOnestepNO(CheckGMM):
# matches Stats's defaults wargs={'centered':False}, has_optimal_weights=False
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
cls.params_tol = [1e-5, 1e-6]
cls.bse_tol = [5e-6, 5e-7]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=0, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res1 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOnestepNO, self).test_other()
class TestGMMStOneiter(CheckGMM):
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
# this uses maxiter=1, one iteration in loop
cls.params_tol = [5e-4, 5e-5]
cls.bse_tol = [7e-3, 5e-4]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0})
cls.res1 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOneiter, self).test_other()
def test_bse_other(self):
res1, res2 = self.res1, self.res2
moms = res1.model.momcond(res1.params)
w = res1.model.calc_weightmatrix(moms)
# try other versions for bse,
# TODO: next two produce the same as before (looks like)
bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False,
weights=res1.weights))))
# TODO: does not look different
#assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0)
bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False))))
#use_weights=True #weights=w
#assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0)
#This does not replicate Stata oneway either
nobs = instrument.shape[0]
w0inv = np.dot(instrument.T, instrument) / nobs
q = self.res1.model.gmmobjective(self.res1.params, w)#self.res1.weights)
#assert_allclose(q, res2.Q, rtol=5e-6, atol=0)
class TestGMMStOneiterNO(CheckGMM):
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
# this uses maxiter=1, one iteration in loop
cls.params_tol = [1e-5, 1e-6]
cls.bse_tol = [5e-6, 5e-7]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res1 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOneiterNO, self).test_other()
#------------ Crosscheck subclasses
class TestGMMStOneiterNO_Linear(CheckGMM):
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
# this uses maxiter=1, one iteration in loop
cls.params_tol = [5e-9, 1e-9]
cls.bse_tol = [5e-10, 1e-10]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.LinearIVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res1 = res
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res3 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOneiterNO_Linear, self).test_other()
class TestGMMStOneiterNO_Nonlinear(CheckGMM):
@classmethod
def setup_class(cls):
# compare to Stata default options, onestep GMM
# this uses maxiter=1, one iteration in loop
cls.params_tol = [5e-5, 5e-6]
cls.bse_tol = [5e-6, 1e-1]
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
def func(params, exog):
return np.dot(exog, params)
mod = gmm.NonlinearIVGMM(endog, exog, instrument, func)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res1 = res
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
wargs={'centered':False}, has_optimal_weights=False)
cls.res3 = res
from .results_gmm_griliches import results_onestep as results
cls.res2 = results
@pytest.mark.xfail(reason="q vs Q comparison fails",
raises=AssertionError, strict=True)
def test_other(self):
super(TestGMMStOneiterNO_Nonlinear, self).test_other()
def test_score(self):
params = self.res1.params * 1.1
weights = self.res1.weights
sc1 = self.res1.model.score(params, weights)
sc2 = super(self.res1.model.__class__, self.res1.model).score(params,
weights)
assert_allclose(sc1, sc2, rtol=1e-6, atol=0)
assert_allclose(sc1, sc2, rtol=0, atol=1e-7)
# score at optimum
sc1 = self.res1.model.score(self.res1.params, weights)
assert_allclose(sc1, np.zeros(len(params)), rtol=0, atol=1e-8)
class TestGMMStOneiterOLS_Linear(CheckGMM):
@classmethod
def setup_class(cls):
# replicating OLS by GMM - high agreement
cls.params_tol = [1e-11, 1e-12]
cls.bse_tol = [1e-12, 1e-12]
exog = exog_st # with const at end
res_ols = OLS(endog, exog).fit()
#Note: start is irrelevant but required
start = np.ones(len(res_ols.params))
nobs, k_instr = instrument.shape
w0inv = np.dot(exog.T, exog) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.LinearIVGMM(endog, exog, exog)
res = mod.fit(start, maxiter=0, inv_weights=w0inv,
#optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0},
optim_args={'disp': 0},
weights_method='iid',
wargs={'centered':False, 'ddof':'k_params'},
has_optimal_weights=True)
# fix use of t distribution see #2495 comment
res.use_t = True
res.df_resid = res.nobs - len(res.params)
cls.res1 = res
#from .results_gmm_griliches import results_onestep as results
#cls.res2 = results
cls.res2 = res_ols
@pytest.mark.xfail(reason="RegressionResults has no `Q` attribute",
raises=AttributeError, strict=True)
def test_other(self):
super(TestGMMStOneiterOLS_Linear, self).test_other()
# ------------------
class TestGMMSt2(object):
# this looks like an old version, trying out different comparisons
# of options with Stats
@classmethod
def setup_class(cls):
# compare to Stata default options, iterative GMM
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
w0inv = np.dot(instrument.T, instrument) / nobs
#w0 = np.linalg.inv(w0inv)
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=2, inv_weights=w0inv,
wargs={'ddof':0, 'centered':False},
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0})
cls.res1 = res
from .results_ivreg2_griliches import results_gmm2s_robust as results
cls.res2 = results
# TODO: remove after testing, compare bse from 1 iteration
# see test_basic
mod = gmm.IVGMM(endog, exog, instrument)
res = mod.fit(start, maxiter=1, inv_weights=w0inv,
wargs={'ddof':0, 'centered':False},
optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0})
cls.res3 = res
def test_basic(self):
res1, res2 = self.res1, self.res2
# test both absolute and relative difference
assert_allclose(res1.params, res2.params, rtol=5e-05, atol=0)
assert_allclose(res1.params, res2.params, rtol=0, atol=5e-06)
n = res1.model.exog.shape[0]
# TODO: check df correction np.sqrt(745./758 )*res1.bse matches better
dffact = np.sqrt(745. / 758 )
assert_allclose(res1.bse * dffact, res2.bse, rtol=5e-03, atol=0)
assert_allclose(res1.bse * dffact, res2.bse, rtol=0, atol=5e-03)
# try other versions for bse,
# TODO: next two produce the same as before (looks like)
bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=True,
weights=res1.weights))))
assert_allclose(res1.bse, res2.bse, rtol=5e-01, atol=0)
bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=True,
weights=res1.weights,
use_weights=True))))
assert_allclose(res1.bse, res2.bse, rtol=5e-02, atol=0)
# TODO: resolve this
# try bse from previous step, is closer to Stata
# guess: Stata ivreg2 does not calc for bse update after final iteration
# need better test case, bse difference is close to numerical optimization precision
assert_allclose(self.res3.bse, res2.bse, rtol=5e-05, atol=0)
assert_allclose(self.res3.bse, res2.bse, rtol=0, atol=5e-06)
# TODO; tvalues are not available yet, no inheritance
#assert_allclose(res1.tvalues, res2.tvalues, rtol=5e-10, atol=0)
class CheckIV2SLS(object):
def test_basic(self):
res1, res2 = self.res1, self.res2
# test both absolute and relative difference
assert_allclose(res1.params, res2.params, rtol=1e-9, atol=0)
assert_allclose(res1.params, res2.params, rtol=0, atol=1e-10)
n = res1.model.exog.shape[0]
assert_allclose(res1.bse, res2.bse, rtol=1e-10, atol=0)
assert_allclose(res1.bse, res2.bse, rtol=0, atol=1e-11)
assert_allclose(res1.tvalues, res2.tvalues, rtol=5e-10, atol=0)
def test_other(self):
res1, res2 = self.res1, self.res2
assert_allclose(res1.rsquared, res2.r2, rtol=1e-7, atol=0)
assert_allclose(res1.rsquared_adj, res2.r2_a, rtol=1e-7, atol=0)
# TODO: why is fvalue different, IV2SLS uses inherited linear
assert_allclose(res1.fvalue, res2.F, rtol=1e-10, atol=0)
assert_allclose(res1.f_pvalue, res2.Fp, rtol=1e-8, atol=0)
assert_allclose(np.sqrt(res1.mse_resid), res2.rmse, rtol=1e-10, atol=0)
assert_allclose(res1.ssr, res2.rss, rtol=1e-10, atol=0)
assert_allclose(res1.uncentered_tss, res2.yy, rtol=1e-10, atol=0)
assert_allclose(res1.centered_tss, res2.yyc, rtol=1e-10, atol=0)
assert_allclose(res1.ess, res2.mss, rtol=1e-9, atol=0)
assert_equal(res1.df_model, res2.df_m)
assert_equal(res1.df_resid, res2.df_r)
# TODO: llf raise NotImplementedError
#assert_allclose(res1.llf, res2.ll, rtol=1e-10, atol=0)
def test_hypothesis(self):
res1, res2 = self.res1, self.res2
restriction = np.eye(len(res1.params))
res_t = res1.t_test(restriction)
assert_allclose(res_t.tvalue, res1.tvalues, rtol=1e-12, atol=0)
assert_allclose(res_t.pvalue, res1.pvalues, rtol=1e-12, atol=0)
res_f = res1.f_test(restriction[:-1]) # without constant
# TODO res1.fvalue problem, see issue #1104
assert_allclose(res_f.fvalue, res1.fvalue, rtol=1e-12, atol=0)
assert_allclose(res_f.pvalue, res1.f_pvalue, rtol=1e-10, atol=0)
assert_allclose(res_f.fvalue, res2.F, rtol=1e-10, atol=0)
assert_allclose(res_f.pvalue, res2.Fp, rtol=1e-08, atol=0)
def test_hausman(self):
res1, res2 = self.res1, self.res2
hausm = res1.spec_hausman()
# hausman uses se2 = ssr / nobs, no df correction
assert_allclose(hausm[0], res2.hausman['DWH'], rtol=1e-11, atol=0)
assert_allclose(hausm[1], res2.hausman['DWHp'], rtol=1e-10, atol=1e-25)
@pytest.mark.smoke
def test_summary(self):
res1 = self.res1
summ = res1.summary()
assert_equal(len(summ.tables[1]), len(res1.params) + 1)
class TestIV2SLSSt1(CheckIV2SLS):
@classmethod
def setup_class(cls):
exog = exog_st # with const at end
start = OLS(endog, exog).fit().params
nobs, k_instr = instrument.shape
mod = gmm.IV2SLS(endog, exog, instrument)
res = mod.fit()
cls.res1 = res
from .results_ivreg2_griliches import results_small as results
cls.res2 = results
# See GH #2720
def test_input_dimensions(self):
rs = np.random.RandomState(1234)
x = rs.randn(200, 2)
z = rs.randn(200)
x[:, 0] = np.sqrt(0.5) * x[:, 0] + np.sqrt(0.5) * z
z = np.column_stack((x[:, [1]], z[:, None]))
e = np.sqrt(0.5) * rs.randn(200) + np.sqrt(0.5) * x[:, 0]
y_1d = y = x[:, 0] + x[:, 1] + e
y_2d = y[:, None]
y_series = pd.Series(y)
y_df = pd.DataFrame(y_series)
x_1d = x[:, 0]
x_2d = x
x_df = pd.DataFrame(x)
x_df_single = x_df.iloc[:, [0]]
x_series = x_df.iloc[:, 0]
z_2d = z
z_series = pd.Series(z[:, 1])
z_1d = z_series.values
z_df = pd.DataFrame(z)
ys = (y_df, y_series, y_2d, y_1d)
xs = (x_2d, x_1d, x_df_single, x_df, x_series)
zs = (z_1d, z_2d, z_series, z_df)
res2 = gmm.IV2SLS(y_1d, x_2d, z_2d).fit()
res1 = gmm.IV2SLS(y_1d, x_1d, z_1d).fit()
res1_2sintr = gmm.IV2SLS(y_1d, x_1d, z_2d).fit()
for _y in ys:
for _x in xs:
for _z in zs:
x_1d = np.size(_x) == _x.shape[0]
z_1d = np.size(_z) == _z.shape[0]
if z_1d and not x_1d:
continue
res = gmm.IV2SLS(_y, _x, _z).fit()
if z_1d:
assert_allclose(res.params, res1.params)
elif x_1d and not z_1d:
assert_allclose(res.params, res1_2sintr.params)
else:
assert_allclose(res.params, res2.params)
def test_noconstant():
exog = exog_st[:, :-1] # with const removed at end
mod = gmm.IV2SLS(endog, exog, instrument)
res = mod.fit()
assert_equal(res.fvalue, np.nan)
# smoke test
summ = res.summary()
assert_equal(len(summ.tables[1]), len(res.params) + 1)
def test_gmm_basic():
# this currently tests mainly the param names, exog_names
# see #4340
cd = np.array([1.5, 1.5, 1.7, 2.2, 2.0, 1.8, 1.8, 2.2, 1.9, 1.6, 1.8, 2.2,
2.0, 1.5, 1.1, 1.5, 1.4, 1.7, 1.42, 1.9])
dcd = np.array([0, 0.2 ,0.5, -0.2, -0.2, 0, 0.4, -0.3, -0.3, 0.2, 0.4,
-0.2, -0.5, -0.4, 0.4, -0.1, 0.3, -0.28, 0.48, 0.2])
inst = np.column_stack((np.ones(len(cd)), cd))
class GMMbase(gmm.GMM):
def momcond(self, params):
p0, p1, p2, p3 = params
endog = self.endog[:, None]
exog = self.exog
inst = self.instrument
mom0 = (endog - p0 - p1 * exog) * inst
mom1 = ((endog - p0 - p1 * exog)**2 -
p2 * (exog**(2 * p3)) / 12) * inst
g = np.column_stack((mom0, mom1))
return g
beta0 = np.array([0.1, 0.1, 0.01, 1])
res = GMMbase(endog=dcd, exog=cd, instrument=inst, k_moms=4,
k_params=4).fit(beta0, optim_args={'disp': 0})
summ = res.summary()
assert_equal(len(summ.tables[1]), len(res.params) + 1)
pnames = ['p%2d' % i for i in range(len(res.params))]
assert_equal(res.model.exog_names, pnames)
# check set_param_names method
mod = GMMbase(endog=dcd, exog=cd, instrument=inst, k_moms=4,
k_params=4)
# use arbitrary names
pnames = ['beta', 'gamma', 'psi', 'phi']
mod.set_param_names(pnames)
res1 = mod.fit(beta0, optim_args={'disp': 0})
assert_equal(res1.model.exog_names, pnames)