Repository URL to install this package:
|
Version:
0.10.2 ▾
|
from statsmodels.sandbox.predict_functional import predict_functional
import numpy as np
import pandas as pd
import pytest
import statsmodels.api as sm
# If true, the output is written to a multi-page pdf file.
pdf_output = False
try:
import matplotlib.pyplot as plt
except ImportError:
pass
def pctl(q):
return lambda x : np.percentile(x, 100 *q)
class TestPredFunc(object):
@classmethod
def setup_class(cls):
if pdf_output:
from matplotlib.backends.backend_pdf import PdfPages
cls.pdf = PdfPages("predict_functional.pdf")
@classmethod
def teardown_class(cls):
if pdf_output:
cls.pdf.close()
def close_or_save(self, fig):
if pdf_output:
self.pdf.savefig(fig)
@pytest.mark.matplotlib
def test_formula(self, close_figures):
np.random.seed(542)
n = 500
x1 = np.random.normal(size=n)
x2 = np.random.normal(size=n)
x3 = np.random.normal(size=n)
x4 = np.random.randint(0, 5, size=n)
x4 = np.asarray(["ABCDE"[i] for i in x4])
x5 = np.random.normal(size=n)
y = 0.3*x2**2 + (x4 == "B") + 0.1*(x4 == "B")*x2**2 + x5 + np.random.normal(size=n)
df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3, "x4": x4, "x5": x5})
fml = "y ~ x1 + bs(x2, df=4) + x3 + x2*x3 + I(x1**2) + C(x4) + C(x4)*bs(x2, df=4) + x5"
model = sm.OLS.from_formula(fml, data=df)
result = model.fit()
summaries = {"x1": np.mean, "x3": pctl(0.75), "x5": np.mean}
values = {"x4": "B"}
pr1, ci1, fvals1 = predict_functional(result, "x2", summaries, values)
values = {"x4": "C"}
pr2, ci2, fvals2 = predict_functional(result, "x2", summaries, values)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x4=B')
plt.plot(fvals2, pr2, '-', label='x4=C')
ha, lb = ax.get_legend_handles_labels()
plt.figlegend(ha, lb, "center right")
plt.xlabel("Focus variable", size=15)
plt.ylabel("Fitted mean", size=15)
plt.title("Linear model prediction")
self.close_or_save(fig)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x4=B')
plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
plt.plot(fvals2, pr2, '-', label='x4=C')
plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
ha, lb = ax.get_legend_handles_labels()
plt.figlegend(ha, lb, "center right")
plt.xlabel("Focus variable", size=15)
plt.ylabel("Fitted mean", size=15)
plt.title("Linear model prediction")
self.close_or_save(fig)
@pytest.mark.matplotlib
def test_lm_contrast(self, close_figures):
np.random.seed(542)
n = 200
x1 = np.random.normal(size=n)
x2 = np.random.normal(size=n)
x3 = np.random.normal(size=n)
y = x1 + 2*x2 + x3 - x1*x2 + x2*x3 + np.random.normal(size=n)
df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})
fml = "y ~ x1 + x2 + x3 + x1*x2 + x2*x3"
model = sm.OLS.from_formula(fml, data=df)
result = model.fit()
values = {"x2": 1, "x3": 1} # y = 4
values2 = {"x2": 0, "x3": 0} # y = x1
pr, cb, fvals = predict_functional(result, "x1", values=values,
values2=values2, ci_method='scheffe')
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.67, 0.8])
plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
plt.plot(fvals, 4 - fvals, '-', label="Truth", color='lime', lw=4)
plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
ha, lb = ax.get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.xlabel("Focus variable", size=15)
plt.ylabel("Mean contrast", size=15)
plt.title("Linear model contrast")
self.close_or_save(fig)
@pytest.mark.matplotlib
def test_glm_formula_contrast(self, close_figures):
np.random.seed(542)
n = 50
x1 = np.random.normal(size=n)
x2 = np.random.normal(size=n)
x3 = np.random.normal(size=n)
mn = 5 + 0.1*x1 + 0.1*x2 + 0.1*x3 - 0.1*x1*x2
y = np.random.poisson(np.exp(mn), size=len(mn))
df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})
fml = "y ~ x1 + x2 + x3 + x1*x2"
model = sm.GLM.from_formula(fml, data=df, family=sm.families.Poisson())
result = model.fit()
values = {"x2": 1, "x3": 1} # y = 5.2
values2 = {"x2": 0, "x3": 0} # y = 5 + 0.1*x1
pr, cb, fvals = predict_functional(result, "x1", values=values,
values2=values2, ci_method='simultaneous')
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.67, 0.8])
plt.plot(fvals, pr, '-', label="Estimate", color='orange', lw=4)
plt.plot(fvals, 0.2 - 0.1*fvals, '-', label="Truth", color='lime', lw=4)
plt.fill_between(fvals, cb[:, 0], cb[:, 1], color='grey')
ha, lb = ax.get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.xlabel("Focus variable", size=15)
plt.ylabel("Linear predictor contrast", size=15)
plt.title("Poisson regression contrast")
self.close_or_save(fig)
@pytest.mark.matplotlib
def test_scb(self, close_figures):
np.random.seed(473)
n = 100
x = np.random.normal(size=(n,4))
x[:, 0] = 1
for fam_name in "poisson", "binomial", "gaussian":
if fam_name == "poisson":
y = np.random.poisson(20, size=n)
fam = sm.families.Poisson()
true_mean = 20
true_lp = np.log(20)
elif fam_name == "binomial":
y = 1 * (np.random.uniform(size=n) < 0.5)
fam = sm.families.Binomial()
true_mean = 0.5
true_lp = 0
elif fam_name == "gaussian":
y = np.random.normal(size=n)
fam = sm.families.Gaussian()
true_mean = 0
true_lp = 0
model = sm.GLM(y, x, family=fam)
result = model.fit()
# CB is for linear predictor or mean response
for linear in False, True:
true = true_lp if linear else true_mean
values = {'const': 1, "x2": 0}
summaries = {"x3": np.mean}
pred1, cb1, fvals1 = predict_functional(result, "x1",
values=values, summaries=summaries, linear=linear)
pred2, cb2, fvals2 = predict_functional(result, "x1",
values=values, summaries=summaries,
ci_method='simultaneous', linear=linear)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.58, 0.8])
plt.plot(fvals1, pred1, '-', color='black', label='Estimate')
plt.plot(fvals1, true * np.ones(len(pred1)), '-', color='purple',
label='Truth')
plt.plot(fvals1, cb1[:, 0], color='blue', label='Pointwise CB')
plt.plot(fvals1, cb1[:, 1], color='blue')
plt.plot(fvals2, cb2[:, 0], color='green', label='Simultaneous CB')
plt.plot(fvals2, cb2[:, 1], color='green')
ha, lb = ax.get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.xlabel("Focus variable", size=15)
if linear:
plt.ylabel("Linear predictor", size=15)
else:
plt.ylabel("Fitted mean", size=15)
plt.title("%s family prediction" % fam_name.capitalize())
self.close_or_save(fig)
@pytest.mark.matplotlib
def test_glm_formula(self, close_figures):
np.random.seed(542)
n = 500
x1 = np.random.normal(size=n)
x2 = np.random.normal(size=n)
x3 = np.random.randint(0, 3, size=n)
x3 = np.asarray(["ABC"[i] for i in x3])
lin_pred = -1 + 0.5*x1**2 + (x3 == "B")
prob = 1 / (1 + np.exp(-lin_pred))
y = 1 * (np.random.uniform(size=n) < prob)
df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "x3": x3})
fml = "y ~ x1 + I(x1**2) + x2 + C(x3)"
model = sm.GLM.from_formula(fml, family=sm.families.Binomial(), data=df)
result = model.fit()
summaries = {"x2": np.mean}
for linear in False, True:
values = {"x3": "B"}
pr1, ci1, fvals1 = predict_functional(result, "x1", summaries, values, linear=linear)
values = {"x3": "C"}
pr2, ci2, fvals2 = predict_functional(result, "x1", summaries, values, linear=linear)
exact1 = -1 + 0.5*fvals1**2 + 1
exact2 = -1 + 0.5*fvals2**2
if not linear:
exact1 = 1 / (1 + np.exp(-exact1))
exact2 = 1 / (1 + np.exp(-exact2))
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x3=B')
plt.plot(fvals2, pr2, '-', label='x3=C')
plt.plot(fvals1, exact1, '-', label='x3=B (exact)')
plt.plot(fvals2, exact2, '-', label='x3=C (exact)')
ha, lb = ax.get_legend_handles_labels()
plt.figlegend(ha, lb, "center right")
plt.xlabel("Focus variable", size=15)
if linear:
plt.ylabel("Fitted linear predictor", size=15)
else:
plt.ylabel("Fitted probability", size=15)
plt.title("Binomial GLM prediction")
self.close_or_save(fig)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x3=B', color='orange')
plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
plt.plot(fvals2, pr2, '-', label='x3=C', color='lime')
plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
ha, lb = ax.get_legend_handles_labels()
plt.figlegend(ha, lb, "center right")
plt.xlabel("Focus variable", size=15)
if linear:
plt.ylabel("Fitted linear predictor", size=15)
else:
plt.ylabel("Fitted probability", size=15)
plt.title("Binomial GLM prediction")
self.close_or_save(fig)
@pytest.mark.matplotlib
def test_noformula_prediction(self, close_figures):
np.random.seed(6434)
n = 200
x1 = np.random.normal(size=n)
x2 = np.random.normal(size=n)
x3 = np.random.normal(size=n)
y = x1 - x2 + np.random.normal(size=n)
exog = np.vstack((x1, x2, x3)).T
model = sm.OLS(y, exog)
result = model.fit()
summaries = {"x3": pctl(0.75)}
values = {"x2": 1}
pr1, ci1, fvals1 = predict_functional(result, "x1", summaries, values)
values = {"x2": -1}
pr2, ci2, fvals2 = predict_functional(result, "x1", summaries, values)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
plt.plot(fvals2, pr2, '-', label='x2=-1', lw=4, alpha=0.6, color='lime')
ha, lb = ax.get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
plt.xlabel("Focus variable", size=15)
plt.ylabel("Fitted mean", size=15)
plt.title("Linear model prediction")
self.close_or_save(fig)
plt.clf()
fig = plt.figure()
ax = plt.axes([0.1, 0.1, 0.7, 0.8])
plt.plot(fvals1, pr1, '-', label='x2=1', lw=4, alpha=0.6, color='orange')
plt.fill_between(fvals1, ci1[:, 0], ci1[:, 1], color='grey')
plt.plot(fvals1, pr2, '-', label='x2=1', lw=4, alpha=0.6, color='lime')
plt.fill_between(fvals2, ci2[:, 0], ci2[:, 1], color='grey')
ha, lb = ax.get_legend_handles_labels()
plt.figlegend(ha, lb, "center right")
plt.xlabel("Focus variable", size=15)
plt.ylabel("Fitted mean", size=15)
plt.title("Linear model prediction")
self.close_or_save(fig)