Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ multivariate / tests / test_ml_factor.py

import numpy as np
from statsmodels.multivariate.factor import Factor
from numpy.testing import assert_allclose, assert_equal
from scipy.optimize import approx_fprime


# A small model for basic testing
def _toy():
    uniq = np.r_[4, 9, 16]
    load = np.asarray([[3, 1, 2], [2, 5, 8]]).T
    par = np.r_[2, 3, 4, 3, 1, 2, 2, 5, 8]
    corr = np.asarray([[1, .5, .25], [.5, 1, .5], [.25, .5, 1]])
    return uniq, load, corr, par


def test_loglike():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    # Two ways of passing the parameters to loglike
    ll1 = fa.loglike((load, uniq))
    ll2 = fa.loglike(par)

    assert_allclose(ll1, ll2)


def test_score():

    uniq, load, corr, par = _toy()
    fa = Factor(n_factor=2, corr=corr)

    def f(par):
        return fa.loglike(par)

    par2 = np.r_[0.1, 0.2, 0.3, 0.4, 0.3, 0.1, 0.2, -0.2, 0, 0.8, 0.5, 0]

    for pt in (par, par2):
        g1 = approx_fprime(pt, f, 1e-8)
        g2 = fa.score(pt)
        assert_allclose(g1, g2, atol=1e-3)


def test_exact():
    # Test if we can recover exact factor-structured matrices with
    # default starting values.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            rslt = fa.fit()
            assert_allclose(rslt.fitted_cov, c, rtol=1e-4, atol=1e-4)
            rslt.summary()  # smoke test


def test_exact_em():
    # Test if we can recover exact factor-structured matrices with
    # default starting values using the EM algorithm.

    np.random.seed(23324)

    # Works for larger k_var but slow for routine testing.
    for k_var in 5, 10, 25:
        for n_factor in 1, 2, 3:
            load = np.random.normal(size=(k_var, n_factor))
            uniq = np.linspace(1, 2, k_var)
            c = np.dot(load, load.T)
            c.flat[::c.shape[0]+1] += uniq
            s = np.sqrt(np.diag(c))
            c /= np.outer(s, s)
            fa = Factor(corr=c, n_factor=n_factor, method='ml')
            load_e, uniq_e = fa._fit_ml_em(200)
            c_e = np.dot(load_e, load_e.T)
            c_e.flat[::c_e.shape[0]+1] += uniq_e
            assert_allclose(c_e, c, rtol=1e-4, atol=1e-4)


def test_em():

    n_factor = 1
    cor = np.asarray([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]])

    fa = Factor(corr=cor, n_factor=n_factor, method='ml')
    rslt = fa.fit(opt={'gtol': 1e-3})
    load_opt = rslt.loadings
    uniq_opt = rslt.uniqueness

    load_em, uniq_em = fa._fit_ml_em(1000)
    cc = np.dot(load_em, load_em.T)
    cc.flat[::cc.shape[0]+1] += uniq_em

    assert_allclose(cc, rslt.fitted_cov, rtol=1e-2, atol=1e-2)


def test_1factor():
    """
    # R code:
    r = 0.4
    p = 4
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    fa = factanal(covmat=cm, factors=1)
    print(fa, digits=10)
    """

    r = 0.4
    p = 4
    ii = np.arange(p)
    cm = r ** np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=1, method='ml')
    rslt = fa.fit()

    if rslt.loadings[0, 0] < 0:
        rslt.loadings[:, 0] *= -1

    # R solution, but our likelihood is higher
    # uniq = np.r_[0.8392472054, 0.5820958187, 0.5820958187, 0.8392472054]
    # load = np.asarray([[0.4009399224, 0.6464550935, 0.6464550935,
    #                     0.4009399224]]).T
    # l1 = fa.loglike(fa._pack(load, uniq))
    # l2 = fa.loglike(fa._pack(rslt.loadings, rslt.uniqueness))

    # So use a smoke test
    uniq = np.r_[0.85290232,  0.60916033,  0.55382266,  0.82610666]
    load = np.asarray([[0.38353316], [0.62517171], [0.66796508],
                       [0.4170052]])

    assert_allclose(load, rslt.loadings, rtol=1e-3, atol=1e-3)
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 2)


def test_2factor():
    """
    # R code:
    r = 0.4
    p = 6
    ii = seq(0, p-1)
    ii = outer(ii, ii, "-")
    ii = abs(ii)
    cm = r^ii
    factanal(covmat=cm, factors=2)
    """

    r = 0.4
    p = 6
    ii = np.arange(p)
    cm = r ** np.abs(np.subtract.outer(ii, ii))

    fa = Factor(corr=cm, n_factor=2, nobs=100, method='ml')
    rslt = fa.fit()

    for j in 0, 1:
        if rslt.loadings[0, j] < 0:
            rslt.loadings[:, j] *= -1

    uniq = np.r_[0.782, 0.367, 0.696, 0.696, 0.367, 0.782]
    assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)

    loads = [np.r_[0.323, 0.586, 0.519, 0.519, 0.586, 0.323],
             np.r_[0.337, 0.538, 0.187, -0.187, -0.538, -0.337]]
    for k in 0, 1:
        if np.dot(loads[k], rslt.loadings[:, k]) < 0:
            loads[k] *= -1
        assert_allclose(loads[k], rslt.loadings[:, k], rtol=1e-3, atol=1e-3)

    assert_equal(rslt.df, 4)

    # Smoke test for standard errors
    e = np.asarray([0.11056836, 0.05191071, 0.09836349,
                    0.09836349, 0.05191071, 0.11056836])
    assert_allclose(rslt.uniq_stderr, e, atol=1e-4)
    e = np.asarray([[0.08842151, 0.08842151], [0.06058582, 0.06058582],
                    [0.08339874, 0.08339874], [0.08339874, 0.08339874],
                    [0.06058582, 0.06058582], [0.08842151, 0.08842151]])
    assert_allclose(rslt.load_stderr, e, atol=1e-4)