Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ stats / tests / test_anova_rm.py

from statsmodels.compat.pandas import assert_frame_equal

import pandas as pd
import numpy as np
from statsmodels.stats.anova import AnovaRM
from numpy.testing import (assert_array_almost_equal, assert_raises,
                           assert_equal)


DV = [7, 3, 6, 6, 5, 8, 6, 7,
      7, 11, 9, 11, 10, 10, 11, 11,
      8, 14, 10, 11, 12, 10, 11, 12,
      16, 7, 11, 9, 10, 11, 8, 8,
      16, 10, 13, 10, 10, 14, 11, 12,
      24, 29, 10, 22, 25, 28, 22, 24,
      1, 3, 5, 8, 3, 5, 6, 8,
      9, 18, 19, 1, 12, 15, 2, 3,
      3, 4, 13, 21, 2, 11, 18, 2,
      12, 7, 12, 3, 19, 1, 4, 13,
      13, 14, 3, 4, 8, 19, 21, 2,
      4, 9, 12, 2, 5, 8, 2, 4]

id = [1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8,
      1, 2, 3, 4, 5, 6, 7, 8]

id = ['%d' % i for i in id]

A = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']

B = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c']

D = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
     'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']

data = pd.DataFrame([id, A, B, D, DV], index=['id', 'A', 'B', 'D', 'DV']).T
data['DV'] = data['DV'].astype('int')


def test_single_factor_repeated_measures_anova():
    """
    Testing single factor repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data.iloc[:16, :], 'DV', 'id', within=['B']).fit()
    a = [[1, 7, 22.4, 0.002125452]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a, decimal=5)


def test_two_factors_repeated_measures_anova():
    """
    Testing two factors repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data.iloc[:48, :], 'DV', 'id', within=['A', 'B']).fit()
    a = [[1, 7, 40.14159, 3.905263e-04],
         [2, 14, 29.21739, 1.007549e-05],
         [2, 14, 17.10545, 1.741322e-04]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a, decimal=5)


def test_three_factors_repeated_measures_anova():
    """
    Testing three factors repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    a = [[1,  7,  8.7650709, 0.021087505],
         [2, 14,  8.4985785, 0.003833921],
         [1,  7, 20.5076546, 0.002704428],
         [2, 14,  0.8457797, 0.450021759],
         [1,  7, 21.7593382, 0.002301792],
         [2, 14,  6.2416695, 0.011536846],
         [2, 14,  5.4253359, 0.018010647]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a, decimal=5)


def test_repeated_measures_invalid_factor_name():
    """
    Test with a factor name of 'C', which conflicts with patsy.
    """
    assert_raises(ValueError, AnovaRM, data.iloc[:16, :], 'DV', 'id',
                  within=['C'])


def test_repeated_measures_collinearity():
    data1 = data.iloc[:48, :].copy()
    data1['E'] = data1['A']
    assert_raises(ValueError, AnovaRM, data1, 'DV', 'id', within=['A', 'E'])


def test_repeated_measures_unbalanced_data():
    assert_raises(ValueError, AnovaRM, data.iloc[1:48, :], 'DV', 'id',
                  within=['A', 'B'])


def test_repeated_measures_aggregation():
    df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    df2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit()

    assert_frame_equal(df1.anova_table, df2.anova_table)


def test_repeated_measures_aggregation_one_subject_duplicated():
    df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    df2 = AnovaRM(data.append(data.loc[data['id'] == '1', :]).reset_index(),
                  'DV', 'id', within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit()

    assert_frame_equal(df1.anova_table, df2.anova_table)


def test_repeated_measures_aggregate_func():
    assert_raises(ValueError, AnovaRM, data.append(data), 'DV', 'id',
                  within=['A', 'B', 'D'])

    m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.mean)
    m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.median)

    assert_raises(AssertionError, assert_equal,
                  m1.aggregate_func, m2.aggregate_func)
    assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)


def test_repeated_measures_aggregate_func_mean():
    m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.mean)

    m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func='mean')

    assert_equal(m1.aggregate_func, m2.aggregate_func)


def test_repeated_measures_aggregate_compare_with_ezANOVA():
    # Results should reproduces those from R's `ezANOVA` (library ez).
    ez = pd.DataFrame(
        {'F Value': [8.7650709, 8.4985785, 20.5076546, 0.8457797, 21.7593382,
                     6.2416695, 5.4253359],
         'Num DF': [1, 2, 1, 2, 1, 2, 2],
         'Den DF': [7, 14, 7, 14, 7, 14, 14],
         'Pr > F': [0.021087505, 0.003833921, 0.002704428, 0.450021759,
                    0.002301792, 0.011536846, 0.018010647]},
        index=pd.Index(['A', 'B', 'D', 'A:B', 'A:D', 'B:D', 'A:B:D']))
    ez = ez[['F Value', 'Num DF', 'Den DF', 'Pr > F']]

    df = (AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                  aggregate_func=np.mean)
          .fit()
          .anova_table)

    assert_frame_equal(ez, df, check_dtype=False)