import warnings
import numpy as np
import pandas as pd
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families
from statsmodels.genmod.families import links
from statsmodels.genmod.generalized_estimating_equations import GEE
from statsmodels.genmod.cov_struct import Independence
from numpy.testing import assert_allclose
class CheckGEEGLM(object):
def test_basic(self):
res1 = self.result1
res2 = self.result2
assert_allclose(res1.params.values, res2.params.values,
rtol=1e-6, atol=1e-10)
def test_resid(self):
res1 = self.result1
res2 = self.result2
assert_allclose(res1.resid_response, res2.resid_response,
rtol=1e-6, atol=1e-10)
assert_allclose(res1.resid_pearson, res2.resid_pearson,
rtol=1e-6, atol=1e-10)
assert_allclose(res1.resid_deviance, res2.resid_deviance,
rtol=1e-6, atol=1e-10)
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
assert_allclose(res1.resid_anscombe, res2.resid_anscombe,
rtol=1e-6, atol=1e-10)
assert_allclose(res1.resid_working, res2.resid_working,
rtol=1e-6, atol=1e-10)
#def test_compare_logit(self):
class TestCompareLogit(CheckGEEGLM):
@classmethod
def setup_class(cls):
vs = Independence()
family = families.Binomial()
np.random.seed(987126)
Y = 1 * (np.random.normal(size=100) < 0)
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.random.randint(0, 4, size=100)
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
family=family, cov_struct=vs)
cls.result1 = mod1.fit()
mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
cls.result2 = mod2.fit(disp=False)
class TestComparePoisson(CheckGEEGLM):
@classmethod
def setup_class(cls):
vs = Independence()
family = families.Poisson()
np.random.seed(987126)
Y = np.exp(1 + np.random.normal(size=100))
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.random.randint(0, 4, size=100)
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
family=family, cov_struct=vs)
cls.result1 = mod1.fit()
mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
cls.result2 = mod2.fit(disp=False)
class TestCompareGaussian(CheckGEEGLM):
@classmethod
def setup_class(cls):
vs = Independence()
family = families.Gaussian()
np.random.seed(987126)
Y = np.random.normal(size=100)
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.kron(np.arange(20), np.ones(5))
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
family=family, cov_struct=vs)
cls.result1 = md.fit()
cls.result2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D).fit()
class TestCompareGamma(CheckGEEGLM):
@classmethod
def setup_class(cls):
# adjusted for Gamma, not in test_gee.py
vs = Independence()
family = families.Gamma(link=links.log())
np.random.seed(987126)
#Y = np.random.normal(size=100)**2
Y = np.exp(0.1 + np.random.normal(size=100)) # log-normal
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.random.randint(0, 4, size=100)
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
family=family, cov_struct=vs)
cls.result1 = mod1.fit()
mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
cls.result2 = mod2.fit(disp=False)