# -*- coding: utf-8 -*-
"""
unit test for GAM
Author: Josef Perktold
"""
import os
import numpy as np
from numpy.testing import assert_allclose, assert_equal, assert_
import pandas as pd
import patsy
from statsmodels.discrete.discrete_model import Poisson, Logit, Probit
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod.families import family
from statsmodels.sandbox.regression.penalized import TheilGLS
from statsmodels.base._penalized import PenalizedMixin
import statsmodels.base._penalties as smpen
from statsmodels.gam.smooth_basis import (BSplines, CyclicCubicSplines)
from statsmodels.gam.generalized_additive_model import (
GLMGam, GLMGamResults, GLMGamResultsWrapper)
from statsmodels.tools.linalg import matrix_sqrt, transf_constraints
from .results import results_pls, results_mpg_bs, results_mpg_bs_poisson
class PoissonPenalized(PenalizedMixin, Poisson):
pass
class LogitPenalized(PenalizedMixin, Logit):
pass
class ProbitPenalized(PenalizedMixin, Probit):
pass
class GLMPenalized(PenalizedMixin, GLM):
pass
cur_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(cur_dir, "results", "motorcycle.csv")
data_mcycle = pd.read_csv(file_path)
file_path = os.path.join(cur_dir, "results", "autos.csv")
df_autos_ = pd.read_csv(file_path)
df_autos = df_autos_[['city_mpg', 'fuel', 'drive', 'weight', 'hp']].dropna()
class CheckGAMMixin(object):
@classmethod
def _init(cls):
# TODO: CyclicCubicSplines raises when using pandas
cc_h = CyclicCubicSplines(np.asarray(data_mcycle['times']), df=[6])
constraints = np.atleast_2d(cc_h.basis.mean(0))
transf = transf_constraints(constraints)
exog = cc_h.basis.dot(transf)
penalty_matrix = transf.T.dot(cc_h.penalty_matrices[0]).dot(transf)
restriction = matrix_sqrt(penalty_matrix)
return exog, penalty_matrix, restriction
def test_params(self):
res1 = self.res1
res2 = self.res2
assert_allclose(res1.params, res2.params, rtol=1e-5)
assert_allclose(np.asarray(res1.cov_params()),
res2.Vp * self.covp_corrfact, rtol=1e-6, atol=1e-9)
assert_allclose(res1.scale, res2.scale * self.covp_corrfact,
rtol=1e-8)
assert_allclose(np.asarray(res1.bse),
res2.se * np.sqrt(self.covp_corrfact),
rtol=1e-6, atol=1e-9)
def test_fitted(self):
res1 = self.res1
res2 = self.res2
assert_allclose(res1.fittedvalues, res2.fitted_values,
rtol=self.rtol_fitted)
class TestTheilPLS5(CheckGAMMixin):
cov_type = 'data-prior'
@classmethod
def setup_class(cls):
exog, penalty_matrix, restriction = cls._init()
endog = data_mcycle['accel']
modp = TheilGLS(endog, exog, r_matrix=restriction)
# scaling of penweith in R mgcv
s_scale_r = 0.02630734
# Theil penweight uses preliminary sigma2_e to scale penweight
sigma_e = 1405.7950179165323
cls.pw = pw = 1 / sigma_e / s_scale_r
cls.res1 = modp.fit(pen_weight=pw, cov_type=cls.cov_type)
cls.res2 = results_pls.pls5
cls.rtol_fitted = 1e-7
cls.covp_corrfact = 0.99786932844203202
def test_cov_robust(self):
res1 = self.res1
res2 = self.res2
pw = res1.penalization_factor
res1 = res1.model.fit(pen_weight=pw, cov_type='sandwich')
assert_allclose(np.asarray(res1.cov_params()),
res2.Ve * self.covp_corrfact, rtol=1e-4)
class TestGLMPenalizedPLS5(CheckGAMMixin):
cov_type = 'nonrobust'
@classmethod
def setup_class(cls):
exog, penalty_matrix, restriction = cls._init()
endog = data_mcycle['accel']
pen = smpen.L2ContraintsPenalty(restriction=restriction)
mod = GLMPenalized(endog, exog, family=family.Gaussian(),
penal=pen)
# scaling of penweight in R mgcv
s_scale_r = 0.02630734
# set pen_weight to correspond to R mgcv example
cls.pw = mod.pen_weight = 1 / s_scale_r / 2
cls.res1 = mod.fit(cov_type=cls.cov_type, method='bfgs', maxiter=100,
disp=0, trim=False, scale='x2')
cls.res2 = results_pls.pls5
cls.rtol_fitted = 1e-5
# edf is currently not available with PenalizedMixin
# need correction for difference in scale denominator
cls.covp_corrfact = 1.0025464444310588
def _test_cov_robust(self):
# TODO: HC0 differs from Theil sandwich, difference is large
res1 = self.res1
res2 = self.res2
pw = res1.model.pen_weight
res1 = res1.model.fit(pen_weight=pw, cov_type='HC0')
assert_allclose(np.asarray(res1.cov_params()),
res2.Ve * self.covp_corrfact, rtol=1e-4)
class TestGAM5Pirls(CheckGAMMixin):
cov_type = 'nonrobust'
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214
x = data_mcycle['times'].values
endog = data_mcycle['accel']
cc = CyclicCubicSplines(x, df=[6], constraints='center')
gam_cc = GLMGam(endog, smoother=cc, alpha=1 / s_scale / 2)
cls.res1 = gam_cc.fit()
cls.res2 = results_pls.pls5
cls.rtol_fitted = 1e-12
# cls.covp_corrfact = 1.0025464444310588 # without edf
# edf is implemented
cls.covp_corrfact = 1
class TestGAM5Bfgs(CheckGAMMixin):
cov_type = 'nonrobust'
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214
x = data_mcycle['times'].values
endog = data_mcycle['accel']
cc = CyclicCubicSplines(x, df=[6], constraints='center')
gam_cc = GLMGam(endog, smoother=cc, alpha=1 / s_scale / 2)
cls.res1 = gam_cc.fit(method='bfgs')
cls.res2 = results_pls.pls5
cls.rtol_fitted = 1e-5
# cls.covp_corrfact = 1.0025464444310588 # without edf
# edf is implemented
cls.covp_corrfact = 1
def test_predict(self):
res1 = self.res1
res2 = self.res2
predicted = res1.predict(None, res1.model.smoother.x[2:4])
assert_allclose(predicted, res1.fittedvalues[2:4],
rtol=1e-13)
assert_allclose(predicted, res2.fitted_values[2:4],
rtol=self.rtol_fitted)
class TestGAM6Pirls(object):
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214
cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6])
gam_cc = GLMGam(data_mcycle['accel'], smoother=cc,
alpha=1 / s_scale / 2)
cls.res1 = gam_cc.fit()
def test_fitted(self):
res1 = self.res1
pred = res1.get_prediction()
self.rtol_fitted = 1e-7
pls6_fittedvalues = np.array([
2.45008146537851, 3.14145063965465, 5.24130119353225,
6.63476330674223, 7.99704341866374, 13.9351103077006,
14.5508371638833, 14.785647621276, 15.1176070735895,
14.8053514054347, 13.790412967255, 13.790412967255,
11.2997845518655, 9.51681958051473, 8.4811626302547])
assert_allclose(res1.fittedvalues[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
assert_allclose(pred.predicted_mean[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
predicted = res1.predict(None, res1.model.smoother.x[2:4])
assert_allclose(predicted, pls6_fittedvalues[2:4],
rtol=self.rtol_fitted)
class TestGAM6Bfgs(object):
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214
cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6])
gam_cc = GLMGam(data_mcycle['accel'], smoother=cc,
alpha=1 / s_scale / 2)
cls.res1 = gam_cc.fit(method='bfgs')
def test_fitted(self):
res1 = self.res1
pred = res1.get_prediction()
self.rtol_fitted = 1e-5
pls6_fittedvalues = np.array([
2.45008146537851, 3.14145063965465, 5.24130119353225,
6.63476330674223, 7.99704341866374, 13.9351103077006,
14.5508371638833, 14.785647621276, 15.1176070735895,
14.8053514054347, 13.790412967255, 13.790412967255,
11.2997845518655, 9.51681958051473, 8.4811626302547])
assert_allclose(res1.fittedvalues[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
assert_allclose(pred.predicted_mean[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
class TestGAM6Bfgs0(object):
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214 # noqa: F841
cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6])
gam_cc = GLMGam(data_mcycle['accel'], smoother=cc,
alpha=0)
cls.res1 = gam_cc.fit(method='bfgs')
def test_fitted(self):
res1 = self.res1
pred = res1.get_prediction()
self.rtol_fitted = 1e-5
pls6_fittedvalues = np.array([
2.63203377595747, 3.41285892739456, 5.78168657308338,
7.35344779586831, 8.89178704316853, 15.7035642157176,
16.4510219628328, 16.7474993878412, 17.3397025587698,
17.1062522298643, 16.1786066072489, 16.1786066072489,
13.7402485937614, 11.9531909618517, 10.9073964111009])
assert_allclose(res1.fittedvalues[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
assert_allclose(pred.predicted_mean[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
pls6_fittedvalues = np.array([
2.45008146537851, 3.14145063965465, 5.24130119353225,
6.63476330674223, 7.99704341866374, 13.9351103077006,
14.5508371638833, 14.785647621276, 15.1176070735895,
14.8053514054347, 13.790412967255, 13.790412967255,
11.2997845518655, 9.51681958051473, 8.4811626302547])
pls6_exog = np.array([
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -0.334312615555276, -0.302733562622373,
-0.200049479196403, -0.12607681525989, -0.0487229716135211,
0.397628373646056, 0.475396222437879, 0.51311526571058,
0.685638355361239, 0.745083051531164, -0.633518318499726,
-0.634362488928233, -0.635472088268483, -0.634802453890957,
-0.632796625534419, -0.589886140629009, -0.574834708734556,
-0.566315983948608, -0.51289784236512, -0.486061743835595,
-0.353449234316442, -0.348107090921062, -0.328814083307981,
-0.313617048982477, -0.296913301955505, -0.191949693921079,
-0.173001127145111, -0.163813487426548, -0.12229019995063,
-0.108463798212062, -0.33613551740577, -0.327911471033406,
-0.303620832999443, -0.287786799373968, -0.272279566127816,
-0.194325957984873, -0.18175817334823, -0.175688807660186,
-0.147654475500976, -0.137597948224942, -0.406564043706154,
-0.409594429953082, -0.412391645561287, -0.409453786864986,
-0.403086590828732, -0.322579243114146, -0.302545882788086,
-0.29221622484174, -0.239207291311699, -0.218194346676734
]).reshape(10, 6, order='F')
class TestGAM6ExogBfgs(object):
@classmethod
def setup_class(cls):
s_scale = 0.0263073404164214
nobs = data_mcycle['times'].shape[0]
cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6],
constraints='center')
gam_cc = GLMGam(data_mcycle['accel'], np.ones(nobs),
smoother=cc, alpha=1 / s_scale / 2)
cls.res1 = gam_cc.fit(method='bfgs')
def test_fitted(self):
res1 = self.res1
pred = res1.get_prediction()
self.rtol_fitted = 1e-5
assert_allclose(res1.fittedvalues[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
assert_allclose(pred.predicted_mean[:15], pls6_fittedvalues,
rtol=self.rtol_fitted)
def test_exog(self):
exog = self.res1.model.exog
Loading ...