import numpy as np
from statsmodels.genmod.bayes_mixed_glm import (BinomialBayesMixedGLM,
PoissonBayesMixedGLM)
import pandas as pd
from scipy import sparse
from numpy.testing import assert_allclose, assert_equal
from scipy.optimize import approx_fprime
def gen_simple_logit(nc, cs, s):
np.random.seed(3799)
exog_vc = np.kron(np.eye(nc), np.ones((cs, 1)))
exog_fe = np.random.normal(size=(nc * cs, 2))
vc = s * np.random.normal(size=nc)
lp = np.dot(exog_fe, np.r_[1, -1]) + np.dot(exog_vc, vc)
pr = 1 / (1 + np.exp(-lp))
y = 1 * (np.random.uniform(size=nc * cs) < pr)
ident = np.zeros(nc, dtype=np.int)
return y, exog_fe, exog_vc, ident
def gen_simple_poisson(nc, cs, s):
np.random.seed(3799)
exog_vc = np.kron(np.eye(nc), np.ones((cs, 1)))
exog_fe = np.random.normal(size=(nc * cs, 2))
vc = s * np.random.normal(size=nc)
lp = np.dot(exog_fe, np.r_[0.1, -0.1]) + np.dot(exog_vc, vc)
r = np.exp(lp)
y = np.random.poisson(r)
ident = np.zeros(nc, dtype=np.int)
return y, exog_fe, exog_vc, ident
def gen_crossed_logit(nc, cs, s1, s2):
np.random.seed(3799)
a = np.kron(np.eye(nc), np.ones((cs, 1)))
b = np.kron(np.ones((cs, 1)), np.eye(nc))
exog_vc = np.concatenate((a, b), axis=1)
exog_fe = np.random.normal(size=(nc * cs, 1))
vc = s1 * np.random.normal(size=2 * nc)
vc[nc:] *= s2 / s1
lp = np.dot(exog_fe, np.r_[-0.5]) + np.dot(exog_vc, vc)
pr = 1 / (1 + np.exp(-lp))
y = 1 * (np.random.uniform(size=nc * cs) < pr)
ident = np.zeros(2 * nc, dtype=np.int)
ident[nc:] = 1
return y, exog_fe, exog_vc, ident
def gen_crossed_poisson(nc, cs, s1, s2):
np.random.seed(3799)
a = np.kron(np.eye(nc), np.ones((cs, 1)))
b = np.kron(np.ones((cs, 1)), np.eye(nc))
exog_vc = np.concatenate((a, b), axis=1)
exog_fe = np.random.normal(size=(nc * cs, 1))
vc = s1 * np.random.normal(size=2 * nc)
vc[nc:] *= s2 / s1
lp = np.dot(exog_fe, np.r_[-0.5]) + np.dot(exog_vc, vc)
r = np.exp(lp)
y = np.random.poisson(r)
ident = np.zeros(2 * nc, dtype=np.int)
ident[nc:] = 1
return y, exog_fe, exog_vc, ident
def gen_crossed_logit_pandas(nc, cs, s1, s2):
np.random.seed(3799)
a = np.kron(np.arange(nc), np.ones(cs))
b = np.kron(np.ones(cs), np.arange(nc))
fe = np.ones(nc * cs)
vc = np.zeros(nc * cs)
for i in np.unique(a):
ii = np.flatnonzero(a == i)
vc[ii] += s1 * np.random.normal()
for i in np.unique(b):
ii = np.flatnonzero(b == i)
vc[ii] += s2 * np.random.normal()
lp = -0.5 * fe + vc
pr = 1 / (1 + np.exp(-lp))
y = 1 * (np.random.uniform(size=nc * cs) < pr)
ident = np.zeros(2 * nc, dtype=np.int)
ident[nc:] = 1
df = pd.DataFrame({"fe": fe, "a": a, "b": b, "y": y})
return df
def test_simple_logit_map():
y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
exog_vc = sparse.csr_matrix(exog_vc)
glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt = glmm.fit_map()
assert_allclose(
glmm.logposterior_grad(rslt.params),
np.zeros_like(rslt.params),
atol=1e-3)
# Test the predict method
for linear in False, True:
for exog in None, exog_fe:
pr1 = rslt.predict(linear=linear, exog=exog)
pr2 = glmm.predict(rslt.params, linear=linear, exog=exog)
assert_allclose(pr1, pr2)
if not linear:
assert_equal(pr1.min() >= 0, True)
assert_equal(pr1.max() <= 1, True)
def test_simple_poisson_map():
y, exog_fe, exog_vc, ident = gen_simple_poisson(10, 10, 0.2)
exog_vc = sparse.csr_matrix(exog_vc)
glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt1 = glmm1.fit_map()
assert_allclose(
glmm1.logposterior_grad(rslt1.params),
np.zeros_like(rslt1.params),
atol=1e-3)
# This should give the same answer as above
glmm2 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt2 = glmm2.fit_map()
assert_allclose(rslt1.params, rslt2.params, atol=1e-4)
# Test the predict method
for linear in False, True:
for exog in None, exog_fe:
pr1 = rslt1.predict(linear=linear, exog=exog)
pr2 = rslt2.predict(linear=linear, exog=exog)
pr3 = glmm1.predict(rslt1.params, linear=linear, exog=exog)
pr4 = glmm2.predict(rslt2.params, linear=linear, exog=exog)
assert_allclose(pr1, pr2, rtol=1e-5)
assert_allclose(pr2, pr3, rtol=1e-5)
assert_allclose(pr3, pr4, rtol=1e-5)
if not linear:
assert_equal(pr1.min() >= 0, True)
assert_equal(pr2.min() >= 0, True)
assert_equal(pr3.min() >= 0, True)
# Check dimensions and PSD status of cov_params
for rslt in rslt1, rslt2:
cp = rslt.cov_params()
p = len(rslt.params)
assert_equal(cp.shape, np.r_[p, p])
np.linalg.cholesky(cp)
def test_crossed_logit_map():
y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2)
exog_vc = sparse.csr_matrix(exog_vc)
glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt = glmm.fit_map()
assert_allclose(
glmm.logposterior_grad(rslt.params),
np.zeros_like(rslt.params),
atol=1e-4)
# Check dimensions and PSD status of cov_params
cp = rslt.cov_params()
p = len(rslt.params)
assert_equal(cp.shape, np.r_[p, p])
np.linalg.cholesky(cp)
def test_crossed_poisson_map():
y, exog_fe, exog_vc, ident = gen_crossed_poisson(10, 10, 1, 1)
exog_vc = sparse.csr_matrix(exog_vc)
glmm = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt = glmm.fit_map()
assert_allclose(
glmm.logposterior_grad(rslt.params),
np.zeros_like(rslt.params),
atol=1e-4)
# Check dimensions and PSD status of cov_params
cp = rslt.cov_params()
p = len(rslt.params)
assert_equal(cp.shape, np.r_[p, p])
np.linalg.cholesky(cp)
def test_logit_map_crossed_formula():
data = gen_crossed_logit_pandas(10, 10, 1, 0.5)
fml = "y ~ fe"
fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"}
glmm = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5)
rslt = glmm.fit_map()
assert_allclose(
glmm.logposterior_grad(rslt.params),
np.zeros_like(rslt.params),
atol=1e-4)
rslt.summary()
r = rslt.random_effects("a")
assert_allclose(
r.iloc[0, :].values, np.r_[-0.02004904, 0.094014], atol=1e-4)
# Check dimensions and PSD status of cov_params
cm = rslt.cov_params()
p = rslt.params.shape[0]
assert_equal(list(cm.shape), [p, p])
np.linalg.cholesky(cm)
def test_elbo_grad():
for f in range(2):
for j in range(2):
if f == 0:
if j == 0:
y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2)
else:
y, exog_fe, exog_vc, ident = gen_crossed_logit(
10, 10, 1, 2)
elif f == 1:
if j == 0:
y, exog_fe, exog_vc, ident = gen_simple_poisson(
10, 10, 0.5)
else:
y, exog_fe, exog_vc, ident = gen_crossed_poisson(
10, 10, 1, 0.5)
exog_vc = sparse.csr_matrix(exog_vc)
if f == 0:
glmm1 = BinomialBayesMixedGLM(
y, exog_fe, exog_vc, ident, vcp_p=0.5)
else:
glmm1 = PoissonBayesMixedGLM(
y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt1 = glmm1.fit_map()
for k in range(3):
if k == 0:
vb_mean = rslt1.params
vb_sd = np.ones_like(vb_mean)
elif k == 1:
vb_mean = np.zeros(len(vb_mean))
vb_sd = np.ones_like(vb_mean)
else:
vb_mean = np.random.normal(size=len(vb_mean))
vb_sd = np.random.uniform(1, 2, size=len(vb_mean))
mean_grad, sd_grad = glmm1.vb_elbo_grad(vb_mean, vb_sd)
def elbo(vec):
n = len(vec) // 2
return glmm1.vb_elbo(vec[:n], vec[n:])
x = np.concatenate((vb_mean, vb_sd))
g1 = approx_fprime(x, elbo, 1e-5)
n = len(x) // 2
mean_grad_n = g1[:n]
sd_grad_n = g1[n:]
assert_allclose(mean_grad, mean_grad_n, atol=1e-2, rtol=1e-2)
assert_allclose(sd_grad, sd_grad_n, atol=1e-2, rtol=1e-2)
def test_simple_logit_vb():
y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0)
exog_vc = sparse.csr_matrix(exog_vc)
glmm1 = BinomialBayesMixedGLM(
y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
rslt1 = glmm1.fit_map()
glmm2 = BinomialBayesMixedGLM(
y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5)
rslt2 = glmm2.fit_vb(rslt1.params)
rslt1.summary()
rslt2.summary()
assert_allclose(
rslt1.params[0:5],
np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254],
rtol=1e-4,
atol=1e-4)
assert_allclose(
rslt2.params[0:5],
np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366],
rtol=1e-4,
atol=1e-4)
for rslt in rslt1, rslt2:
cp = rslt.cov_params()
p = len(rslt.params)
if rslt is rslt1:
assert_equal(cp.shape, np.r_[p, p])
np.linalg.cholesky(cp)
else:
assert_equal(cp.shape, np.r_[p,])
assert_equal(cp > 0, True*np.ones(p))
def test_simple_poisson_vb():
y, exog_fe, exog_vc, ident = gen_simple_poisson(10, 10, 1)
exog_vc = sparse.csr_matrix(exog_vc)
glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt1 = glmm1.fit_map()
glmm2 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5)
rslt2 = glmm2.fit_vb(rslt1.params)
rslt1.summary()
rslt2.summary()
Loading ...