Gemfury

alkaline-ml / statsmodels python

Repository URL to install this package:
Details
statsmodels / multivariate / tests / test_multivariate_ols.py
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from statsmodels.multivariate.multivariate_ols import _MultivariateOLS
from numpy.testing import assert_array_almost_equal, assert_raises
import patsy

data = pd.DataFrame([['Morphine', 'N', .04, .20, .10, .08],
                     ['Morphine', 'N', .02, .06, .02, .02],
                     ['Morphine', 'N', .07, 1.40, .48, .24],
                     ['Morphine', 'N', .17, .57, .35, .24],
                     ['Morphine', 'Y', .10, .09, .13, .14],
                     ['placebo', 'Y', .07, .07, .06, .07],
                     ['placebo', 'Y', .05, .07, .06, .07],
                     ['placebo', 'N', .03, .62, .31, .22],
                     ['placebo', 'N', .03, 1.05, .73, .60],
                     ['placebo', 'N', .07, .83, 1.07, .80],
                     ['Trimethaphan', 'N', .09, 3.13, 2.06, 1.23],
                     ['Trimethaphan', 'Y', .10, .09, .09, .08],
                     ['Trimethaphan', 'Y', .08, .09, .09, .10],
                     ['Trimethaphan', 'Y', .13, .10, .12, .12],
                     ['Trimethaphan', 'Y', .06, .05, .05, .05]],
                    columns=['Drug', 'Depleted',
                             'Histamine0', 'Histamine1',
                             'Histamine3', 'Histamine5'])

for i in range(2, 6):
    data.iloc[:, i] = np.log(data.iloc[:, i])


def compare_r_output_dogs_data(method):
    ''' Testing within-subject effect interact with 2 between-subject effect
    Compares with R car library Anova(, type=3) output

    Note: The test statistis Phillai, Wilks, Hotelling-Lawley
          and Roy are the same as R output but the approximate F and degree
          of freedoms can be different. This is due to the fact that this
          implementation is based on SAS formula [1]

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    '''


    # Repeated measures with orthogonal polynomial contrasts coding
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit(method=method)
    r = r.mv_test()
    a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
    assert_array_almost_equal(r['Intercept']['stat'].values, a, decimal=6)
    a = [[8.39646619e-02, 8, 1.20000000e+01, 3.67658068e+00, 2.12614444e-02],
         [1.18605382e+00, 8, 1.40000000e+01, 2.55003861e+00, 6.01270701e-02],
         [7.69391362e+00, 8, 6.63157895e+00, 5.50814270e+00, 2.07392260e-02],
         [7.25036952e+00, 4, 7.00000000e+00, 1.26881467e+01, 2.52669877e-03]]
    assert_array_almost_equal(r['Drug']['stat'].values, a, decimal=6)
    a = [[0.32048892, 4., 6., 3.18034906, 0.10002373],
         [0.67951108, 4., 6., 3.18034906, 0.10002373],
         [2.12023271, 4., 6., 3.18034906, 0.10002373],
         [2.12023271, 4., 6., 3.18034906, 0.10002373]]
    assert_array_almost_equal(r['Depleted']['stat'].values, a, decimal=6)
    a = [[0.15234366, 8., 12.,        2.34307678, 0.08894239],
         [1.13013353, 8., 14.,        2.27360606, 0.08553213],
         [3.70989596, 8., 6.63157895, 2.65594824, 0.11370285],
         [3.1145597,  4., 7.,         5.45047947, 0.02582767]]
    assert_array_almost_equal(r['Drug:Depleted']['stat'].values, a, decimal=6)


def test_glm_dogs_example():
    compare_r_output_dogs_data(method='svd')
    compare_r_output_dogs_data(method='pinv')


def test_specify_L_M_by_string():
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit()
    r1 = r.mv_test(hypotheses=[['Intercept', ['Intercept'], None]])
    a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
         [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
    assert_array_almost_equal(r1['Intercept']['stat'].values, a, decimal=6)
    L = ['Intercept', 'Drug[T.Trimethaphan]', 'Drug[T.placebo]']
    M = ['Histamine1', 'Histamine3', 'Histamine5']
    r1 = r.mv_test(hypotheses=[['a', L, M]])
    a = [[1, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0]]
    assert_array_almost_equal(r1['a']['contrast_L'], a, decimal=10)
    a = [[0, 1, 0, 0],
         [0, 0, 1, 0],
         [0, 0, 0, 1]]
    assert_array_almost_equal(r1['a']['transform_M'].T, a, decimal=10)


def test_independent_variable_singular():
    data1 = data.copy()
    data1['dup'] = data1['Drug']
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
        data1)
    assert_raises(ValueError, mod.fit)
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
        data1)
    assert_raises(ValueError,  mod.fit)


def test_from_formula_vs_no_formula():
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit(method='svd')
    r0 = r.mv_test()
    endog, exog = patsy.dmatrices(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data, return_type="dataframe")
    L = np.array([[1, 0, 0, 0, 0, 0]])
    # DataFrame input
    r = _MultivariateOLS(endog, exog).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
    assert_array_almost_equal(r1['Intercept']['stat'].values,
                              r0['Intercept']['stat'].values, decimal=6)
    # Numpy array input
    r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
    assert_array_almost_equal(r1['Intercept']['stat'].values,
                              r0['Intercept']['stat'].values, decimal=6)
    L = np.array([[0, 1, 0, 0, 0, 0],
                  [0, 0, 1, 0, 0, 0],
                  ])
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    # DataFrame input
    r = _MultivariateOLS(endog, exog).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    assert_array_almost_equal(r1['Drug']['stat'].values,
                              r0['Drug']['stat'].values, decimal=6)
    # Numpy array input
    r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    assert_array_almost_equal(r1['Drug']['stat'].values,
                              r0['Drug']['stat'].values, decimal=6)

def test_L_M_matrices_1D_array():
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit(method='svd')
    L = np.array([1, 0, 0, 0, 0, 0])
    assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, None]])
    L = np.array([[1, 0, 0, 0, 0, 0]])
    M = np.array([1, 0, 0, 0, 0, 0])
    assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, M]])


def test_exog_1D_array():
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ 0 + Depleted',
        data)
    r = mod.fit(method='svd')
    r0 = r.mv_test()
    a = [[0.0019, 8.0000, 20.0000, 55.0013, 0.0000],
         [1.8112, 8.0000, 22.0000, 26.3796, 0.0000],
         [97.8858, 8.0000, 12.1818, 117.1133, 0.0000],
         [93.2742, 4.0000, 11.0000, 256.5041, 0.0000]]
    assert_array_almost_equal(r0['Depleted']['stat'].values, a, decimal=4)


def test_endog_1D_array():
    assert_raises(ValueError, _MultivariateOLS.from_formula,
        'Histamine0 ~ 0 + Depleted', data)

def test_affine_hypothesis():
    # Testing affine hypothesis, compared with R car linearHypothesis
    # Note: The test statistis Phillai, Wilks, Hotelling-Lawley
    # and Roy are the same as R output but the approximate F and degree
    # of freedoms can be different. This is due to the fact that this
    # implementation is based on SAS formula [1]
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit(method='svd')
    L = np.array([[0, 1.2, 1.1, 1.3, 1.5, 1.4],
                  [0, 3.2, 2.1, 3.3, 5.5, 4.4]])
    M = None
    C = np.array([[1, 2, 3, 4],
                  [5, 6, 7, 8]])
    r0 = r.mv_test(hypotheses=[('test1', L, M, C)])
    a = [[0.0269, 8.0000, 12.0000, 7.6441, 0.0010],
         [1.4277, 8.0000, 14.0000, 4.3657, 0.0080],
         [19.2678, 8.0000, 6.6316, 13.7940, 0.0016],
         [18.3470, 4.0000, 7.0000, 32.1072, 0.0001]]
    assert_array_almost_equal(r0['test1']['stat'].values, a, decimal=4)
    r0.summary(show_contrast_L=True, show_transform_M=True,
               show_constant_C=True)
alkaline-ml / statsmodels python

Products

About

Resources

Contact Gemfury