from statsmodels.compat.pandas import assert_frame_equal
import pandas as pd
import numpy as np
from statsmodels.stats.anova import AnovaRM
from numpy.testing import (assert_array_almost_equal, assert_raises,
assert_equal)
DV = [7, 3, 6, 6, 5, 8, 6, 7,
7, 11, 9, 11, 10, 10, 11, 11,
8, 14, 10, 11, 12, 10, 11, 12,
16, 7, 11, 9, 10, 11, 8, 8,
16, 10, 13, 10, 10, 14, 11, 12,
24, 29, 10, 22, 25, 28, 22, 24,
1, 3, 5, 8, 3, 5, 6, 8,
9, 18, 19, 1, 12, 15, 2, 3,
3, 4, 13, 21, 2, 11, 18, 2,
12, 7, 12, 3, 19, 1, 4, 13,
13, 14, 3, 4, 8, 19, 21, 2,
4, 9, 12, 2, 5, 8, 2, 4]
id = [1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8]
id = ['%d' % i for i in id]
A = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']
B = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c']
D = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']
data = pd.DataFrame([id, A, B, D, DV], index=['id', 'A', 'B', 'D', 'DV']).T
data['DV'] = data['DV'].astype('int')
def test_single_factor_repeated_measures_anova():
"""
Testing single factor repeated measures anova
Results reproduces R `ezANOVA` function from library ez
"""
df = AnovaRM(data.iloc[:16, :], 'DV', 'id', within=['B']).fit()
a = [[1, 7, 22.4, 0.002125452]]
assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
a, decimal=5)
def test_two_factors_repeated_measures_anova():
"""
Testing two factors repeated measures anova
Results reproduces R `ezANOVA` function from library ez
"""
df = AnovaRM(data.iloc[:48, :], 'DV', 'id', within=['A', 'B']).fit()
a = [[1, 7, 40.14159, 3.905263e-04],
[2, 14, 29.21739, 1.007549e-05],
[2, 14, 17.10545, 1.741322e-04]]
assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
a, decimal=5)
def test_three_factors_repeated_measures_anova():
"""
Testing three factors repeated measures anova
Results reproduces R `ezANOVA` function from library ez
"""
df = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
a = [[1, 7, 8.7650709, 0.021087505],
[2, 14, 8.4985785, 0.003833921],
[1, 7, 20.5076546, 0.002704428],
[2, 14, 0.8457797, 0.450021759],
[1, 7, 21.7593382, 0.002301792],
[2, 14, 6.2416695, 0.011536846],
[2, 14, 5.4253359, 0.018010647]]
assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
a, decimal=5)
def test_repeated_measures_invalid_factor_name():
"""
Test with a factor name of 'C', which conflicts with patsy.
"""
assert_raises(ValueError, AnovaRM, data.iloc[:16, :], 'DV', 'id',
within=['C'])
def test_repeated_measures_collinearity():
data1 = data.iloc[:48, :].copy()
data1['E'] = data1['A']
assert_raises(ValueError, AnovaRM, data1, 'DV', 'id', within=['A', 'E'])
def test_repeated_measures_unbalanced_data():
assert_raises(ValueError, AnovaRM, data.iloc[1:48, :], 'DV', 'id',
within=['A', 'B'])
def test_repeated_measures_aggregation():
df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
df2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.mean).fit()
assert_frame_equal(df1.anova_table, df2.anova_table)
def test_repeated_measures_aggregation_one_subject_duplicated():
df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
df2 = AnovaRM(data.append(data.loc[data['id'] == '1', :]).reset_index(),
'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.mean).fit()
assert_frame_equal(df1.anova_table, df2.anova_table)
def test_repeated_measures_aggregate_func():
assert_raises(ValueError, AnovaRM, data.append(data), 'DV', 'id',
within=['A', 'B', 'D'])
m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.mean)
m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.median)
assert_raises(AssertionError, assert_equal,
m1.aggregate_func, m2.aggregate_func)
assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
def test_repeated_measures_aggregate_func_mean():
m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.mean)
m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func='mean')
assert_equal(m1.aggregate_func, m2.aggregate_func)
def test_repeated_measures_aggregate_compare_with_ezANOVA():
# Results should reproduces those from R's `ezANOVA` (library ez).
ez = pd.DataFrame(
{'F Value': [8.7650709, 8.4985785, 20.5076546, 0.8457797, 21.7593382,
6.2416695, 5.4253359],
'Num DF': [1, 2, 1, 2, 1, 2, 2],
'Den DF': [7, 14, 7, 14, 7, 14, 14],
'Pr > F': [0.021087505, 0.003833921, 0.002704428, 0.450021759,
0.002301792, 0.011536846, 0.018010647]},
index=pd.Index(['A', 'B', 'D', 'A:B', 'A:D', 'B:D', 'A:B:D']))
ez = ez[['F Value', 'Num DF', 'Den DF', 'Pr > F']]
df = (AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func=np.mean)
.fit()
.anova_table)
assert_frame_equal(ez, df, check_dtype=False)