Repository URL to install this package:
|
Version:
0.10.2 ▾
|
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from statsmodels.multivariate.multivariate_ols import _MultivariateOLS
from numpy.testing import assert_array_almost_equal, assert_raises
import patsy
data = pd.DataFrame([['Morphine', 'N', .04, .20, .10, .08],
['Morphine', 'N', .02, .06, .02, .02],
['Morphine', 'N', .07, 1.40, .48, .24],
['Morphine', 'N', .17, .57, .35, .24],
['Morphine', 'Y', .10, .09, .13, .14],
['placebo', 'Y', .07, .07, .06, .07],
['placebo', 'Y', .05, .07, .06, .07],
['placebo', 'N', .03, .62, .31, .22],
['placebo', 'N', .03, 1.05, .73, .60],
['placebo', 'N', .07, .83, 1.07, .80],
['Trimethaphan', 'N', .09, 3.13, 2.06, 1.23],
['Trimethaphan', 'Y', .10, .09, .09, .08],
['Trimethaphan', 'Y', .08, .09, .09, .10],
['Trimethaphan', 'Y', .13, .10, .12, .12],
['Trimethaphan', 'Y', .06, .05, .05, .05]],
columns=['Drug', 'Depleted',
'Histamine0', 'Histamine1',
'Histamine3', 'Histamine5'])
for i in range(2, 6):
data.iloc[:, i] = np.log(data.iloc[:, i])
def compare_r_output_dogs_data(method):
''' Testing within-subject effect interact with 2 between-subject effect
Compares with R car library Anova(, type=3) output
Note: The test statistis Phillai, Wilks, Hotelling-Lawley
and Roy are the same as R output but the approximate F and degree
of freedoms can be different. This is due to the fact that this
implementation is based on SAS formula [1]
.. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
'''
# Repeated measures with orthogonal polynomial contrasts coding
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit(method=method)
r = r.mv_test()
a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
[9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
assert_array_almost_equal(r['Intercept']['stat'].values, a, decimal=6)
a = [[8.39646619e-02, 8, 1.20000000e+01, 3.67658068e+00, 2.12614444e-02],
[1.18605382e+00, 8, 1.40000000e+01, 2.55003861e+00, 6.01270701e-02],
[7.69391362e+00, 8, 6.63157895e+00, 5.50814270e+00, 2.07392260e-02],
[7.25036952e+00, 4, 7.00000000e+00, 1.26881467e+01, 2.52669877e-03]]
assert_array_almost_equal(r['Drug']['stat'].values, a, decimal=6)
a = [[0.32048892, 4., 6., 3.18034906, 0.10002373],
[0.67951108, 4., 6., 3.18034906, 0.10002373],
[2.12023271, 4., 6., 3.18034906, 0.10002373],
[2.12023271, 4., 6., 3.18034906, 0.10002373]]
assert_array_almost_equal(r['Depleted']['stat'].values, a, decimal=6)
a = [[0.15234366, 8., 12., 2.34307678, 0.08894239],
[1.13013353, 8., 14., 2.27360606, 0.08553213],
[3.70989596, 8., 6.63157895, 2.65594824, 0.11370285],
[3.1145597, 4., 7., 5.45047947, 0.02582767]]
assert_array_almost_equal(r['Drug:Depleted']['stat'].values, a, decimal=6)
def test_glm_dogs_example():
compare_r_output_dogs_data(method='svd')
compare_r_output_dogs_data(method='pinv')
def test_specify_L_M_by_string():
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit()
r1 = r.mv_test(hypotheses=[['Intercept', ['Intercept'], None]])
a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
[9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
assert_array_almost_equal(r1['Intercept']['stat'].values, a, decimal=6)
L = ['Intercept', 'Drug[T.Trimethaphan]', 'Drug[T.placebo]']
M = ['Histamine1', 'Histamine3', 'Histamine5']
r1 = r.mv_test(hypotheses=[['a', L, M]])
a = [[1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0]]
assert_array_almost_equal(r1['a']['contrast_L'], a, decimal=10)
a = [[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
assert_array_almost_equal(r1['a']['transform_M'].T, a, decimal=10)
def test_independent_variable_singular():
data1 = data.copy()
data1['dup'] = data1['Drug']
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
data1)
assert_raises(ValueError, mod.fit)
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
data1)
assert_raises(ValueError, mod.fit)
def test_from_formula_vs_no_formula():
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit(method='svd')
r0 = r.mv_test()
endog, exog = patsy.dmatrices(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data, return_type="dataframe")
L = np.array([[1, 0, 0, 0, 0, 0]])
# DataFrame input
r = _MultivariateOLS(endog, exog).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
assert_array_almost_equal(r1['Intercept']['stat'].values,
r0['Intercept']['stat'].values, decimal=6)
# Numpy array input
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
assert_array_almost_equal(r1['Intercept']['stat'].values,
r0['Intercept']['stat'].values, decimal=6)
L = np.array([[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
])
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
# DataFrame input
r = _MultivariateOLS(endog, exog).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
assert_array_almost_equal(r1['Drug']['stat'].values,
r0['Drug']['stat'].values, decimal=6)
# Numpy array input
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
assert_array_almost_equal(r1['Drug']['stat'].values,
r0['Drug']['stat'].values, decimal=6)
def test_L_M_matrices_1D_array():
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit(method='svd')
L = np.array([1, 0, 0, 0, 0, 0])
assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, None]])
L = np.array([[1, 0, 0, 0, 0, 0]])
M = np.array([1, 0, 0, 0, 0, 0])
assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, M]])
def test_exog_1D_array():
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ 0 + Depleted',
data)
r = mod.fit(method='svd')
r0 = r.mv_test()
a = [[0.0019, 8.0000, 20.0000, 55.0013, 0.0000],
[1.8112, 8.0000, 22.0000, 26.3796, 0.0000],
[97.8858, 8.0000, 12.1818, 117.1133, 0.0000],
[93.2742, 4.0000, 11.0000, 256.5041, 0.0000]]
assert_array_almost_equal(r0['Depleted']['stat'].values, a, decimal=4)
def test_endog_1D_array():
assert_raises(ValueError, _MultivariateOLS.from_formula,
'Histamine0 ~ 0 + Depleted', data)
def test_affine_hypothesis():
# Testing affine hypothesis, compared with R car linearHypothesis
# Note: The test statistis Phillai, Wilks, Hotelling-Lawley
# and Roy are the same as R output but the approximate F and degree
# of freedoms can be different. This is due to the fact that this
# implementation is based on SAS formula [1]
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit(method='svd')
L = np.array([[0, 1.2, 1.1, 1.3, 1.5, 1.4],
[0, 3.2, 2.1, 3.3, 5.5, 4.4]])
M = None
C = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]])
r0 = r.mv_test(hypotheses=[('test1', L, M, C)])
a = [[0.0269, 8.0000, 12.0000, 7.6441, 0.0010],
[1.4277, 8.0000, 14.0000, 4.3657, 0.0080],
[19.2678, 8.0000, 6.6316, 13.7940, 0.0016],
[18.3470, 4.0000, 7.0000, 32.1072, 0.0001]]
assert_array_almost_equal(r0['test1']['stat'].values, a, decimal=4)
r0.summary(show_contrast_L=True, show_transform_M=True,
show_constant_C=True)