Gemfury

steminc / scikits.statsmodels python

Repository URL to install this package:
Details
scikits.statsmodels / statsmodels / regression / tests / test_regression.py
"""
Test functions for models.regression
"""
import numpy as np
from numpy.testing import *
from scipy.linalg import toeplitz
from scikits.statsmodels.tools.tools import add_constant
from scikits.statsmodels.regression.linear_model import (OLS, GLSAR, WLS, GLS,
        yule_walker)
from scikits.statsmodels.datasets import longley
#from check_for_rpy import skip_rpy
from nose import SkipTest
from scipy.stats import t as student_t

DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_7 = 7
DECIMAL_0 = 0
#skipR = skip_rpy()
#if not skipR:
#    from rpy import r, RPyRException
#    from rmodelwrap import RModel


class CheckRegressionResults(object):
    '''
    res2 contains results from Rmodelwrap or were obtained from a statistical
    packages such as R, Stata, or SAS and were written to model_results
    '''

    decimal_params = DECIMAL_4
    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params,
                self.decimal_params)

    decimal_standarderrors = DECIMAL_4
    def test_standarderrors(self):
        assert_almost_equal(self.res1.bse,self.res2.bse,
                self.decimal_standarderrors)

    decimal_confidenceintervals = DECIMAL_4
    def test_confidenceintervals(self):
#        if hasattr(self.res2, 'conf_int'):
#            self.check_confidenceintervals(self.res1.conf_int(),
#                self.res2.conf_int)
#        else:
#            raise SkipTest, "Results from Rpy"
#NOTE: stata rounds residuals (at least) to sig digits so approx_equal
        conf1 = self.res1.conf_int()
        conf2 = self.res2.conf_int()
        for i in range(len(conf1)):
            assert_approx_equal(conf1[i][0], conf2[i][0],
                    self.decimal_confidenceintervals)
            assert_approx_equal(conf1[i][1], conf2[i][1],
                    self.decimal_confidenceintervals)

    decimal_conf_int_subset = DECIMAL_4
    def test_conf_int_subset(self):
        if len(self.res1.params) > 1:
            ci1 = self.res1.conf_int(cols=(1,2))
            ci2 = self.res1.conf_int()[1:3]
            assert_almost_equal(ci1, ci2, self.decimal_conf_int_subset)
        else:
            pass

    decimal_scale = DECIMAL_4
    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale,
                self.decimal_scale)

    decimal_rsquared = DECIMAL_4
    def test_rsquared(self):
        assert_almost_equal(self.res1.rsquared, self.res2.rsquared,
                self.decimal_rsquared)

    decimal_rsquared_adj = DECIMAL_4
    def test_rsquared_adj(self):
        assert_almost_equal(self.res1.rsquared_adj, self.res2.rsquared_adj,
                    self.decimal_rsquared_adj)

    def test_degrees(self):
        assert_equal(self.res1.model.df_model, self.res2.df_model)
        assert_equal(self.res1.model.df_resid, self.res2.df_resid)

    decimal_ess = DECIMAL_4
    def test_ess(self):
        """
        Explained Sum of Squares
        """
        assert_almost_equal(self.res1.ess, self.res2.ess,
                    self.decimal_ess)

    decimal_ssr = DECIMAL_4
    def test_sumof_squaredresids(self):
        assert_almost_equal(self.res1.ssr, self.res2.ssr, self.decimal_ssr)

    decimal_mse_resid = DECIMAL_4
    def test_mse_resid(self):
        """
        Mean squared error of residuals
        """
        assert_almost_equal(self.res1.mse_model, self.res2.mse_model,
                    self.decimal_mse_resid)

    decimal_mse_model = DECIMAL_4
    def test_mse_model(self):
        assert_almost_equal(self.res1.mse_resid, self.res2.mse_resid,
                    self.decimal_mse_model)

    decimal_fvalue = DECIMAL_4
    def test_fvalue(self):
        #didn't change this, not sure it should complain -inf not equal -inf
        #if not (np.isinf(self.res1.fvalue) and np.isinf(self.res2.fvalue)):
        assert_almost_equal(self.res1.fvalue, self.res2.fvalue,
                self.decimal_fvalue)

    decimal_loglike = DECIMAL_4
    def test_loglike(self):
        assert_almost_equal(self.res1.llf, self.res2.llf, self.decimal_loglike)

    decimal_aic = DECIMAL_4
    def test_aic(self):
        assert_almost_equal(self.res1.aic, self.res2.aic, self.decimal_aic)

    decimal_bic = DECIMAL_4
    def test_bic(self):
        assert_almost_equal(self.res1.bic, self.res2.bic, self.decimal_bic)

    decimal_pvalues = DECIMAL_4
    def test_pvalues(self):
        assert_almost_equal(self.res1.pvalues, self.res2.pvalues,
            self.decimal_pvalues)

    decimal_wresid = DECIMAL_4
    def test_wresid(self):
        assert_almost_equal(self.res1.wresid, self.res2.wresid,
            self.decimal_wresid)

    decimal_resids = DECIMAL_4
    def test_resids(self):
        assert_almost_equal(self.res1.resid, self.res2.resid,
            self.decimal_resids)

#TODO: test fittedvalues and what else?

class TestOLS(CheckRegressionResults):
    @classmethod
    def setupClass(cls):
        from results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")
        cls.res_qr = res_qr


#  Robust error tests.  Compare values computed with SAS
    def test_HC0_errors(self):
        '''
        They are split up because the copied results do not have any DECIMAL_4
        places for the last place.
        '''
        assert_almost_equal(self.res1.HC0_se[:-1],
                self.res2.HC0_se[:-1], DECIMAL_4)
        assert_approx_equal(np.round(self.res1.HC0_se[-1]), self.res2.HC0_se[-1])

    def test_HC1_errors(self):
        assert_almost_equal(self.res1.HC1_se[:-1],
                self.res2.HC1_se[:-1], DECIMAL_4)
        assert_approx_equal(self.res1.HC1_se[-1], self.res2.HC1_se[-1])

    def test_HC2_errors(self):
        assert_almost_equal(self.res1.HC2_se[:-1],
                self.res2.HC2_se[:-1], DECIMAL_4)
        assert_approx_equal(self.res1.HC2_se[-1], self.res2.HC2_se[-1])

    def test_HC3_errors(self):
        assert_almost_equal(self.res1.HC3_se[:-1],
                self.res2.HC3_se[:-1], DECIMAL_4)
        assert_approx_equal(self.res1.HC3_se[-1], self.res2.HC3_se[-1])

    def test_qr_params(self):
        assert_almost_equal(self.res1.params,
                self.res_qr.params, 7)

    def test_qr_normalized_cov_params(self):
        #todo: need assert_close
        assert_almost_equal(np.ones_like(self.res1.normalized_cov_params),
                self.res1.normalized_cov_params /
                self.res_qr.normalized_cov_params, 5)


class TestFtest(object):
    """
    Tests f_test vs. RegressionResults
    """
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        cls.res1 = OLS(data.endog, data.exog).fit()
        R = np.identity(7)[:-1,:]
        cls.Ftest = cls.res1.f_test(R)

    def test_F(self):
        assert_almost_equal(self.Ftest.fvalue, self.res1.fvalue, DECIMAL_4)

    def test_p(self):
        assert_almost_equal(self.Ftest.pvalue, self.res1.f_pvalue, DECIMAL_4)

    def test_Df_denom(self):
        assert_equal(self.Ftest.df_denom, self.res1.model.df_resid)

    def test_Df_num(self):
        assert_equal(self.Ftest.df_num, 6)

class TestFTest2(object):
    '''
    A joint test that the coefficient on
    GNP = the coefficient on UNEMP  and that the coefficient on
    POP = the coefficient on YEAR for the Longley dataset.

    Ftest1 is from statsmodels.  Results are from Rpy using R's car library.
    '''
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        res1 = OLS(data.endog, data.exog).fit()
        R2 = [[0,1,-1,0,0,0,0],[0, 0, 0, 0, 1, -1, 0]]
        cls.Ftest1 = res1.f_test(R2)
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        try:
#            r.library('car')
#        except RPyRException:
#            raise SkipTest, "car library not installed for R"
#        self.R2 = [[0,1,-1,0,0,0,0],[0, 0, 0, 0, 1, -1, 0]]
#        self.Ftest2 = self.res1.f_test(self.R2)
#        self.R_Results = RModel(self.data.endog, self.data.exog, r.lm).robj
#        self.F = r.linear_hypothesis(self.R_Results,
#                r.c('x.2 = x.3', 'x.5 = x.6'))


    def test_fvalue(self):
        assert_almost_equal(self.Ftest1.fvalue, 9.7404618732968196, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(self.Ftest1.pvalue, 0.0056052885317493459,
                DECIMAL_4)

    def test_df_denom(self):
        assert_equal(self.Ftest1.df_denom, 9)

    def test_df_num(self):
        assert_equal(self.Ftest1.df_num, 2)

class TestFtestQ(object):
    """
    A joint hypothesis test that Rb = q.  Coefficient tests are essentially
    made up.  Test values taken from Stata.
    """
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        res1 = OLS(data.endog, data.exog).fit()
        R = np.array([[0,1,1,0,0,0,0],
              [0,1,0,1,0,0,0],
              [0,1,0,0,0,0,0],
              [0,0,0,0,1,0,0],
              [0,0,0,0,0,1,0]])
        q = np.array([0,0,0,1,0])
        cls.Ftest1 = res1.f_test(R,q)

    def test_fvalue(self):
        assert_almost_equal(self.Ftest1.fvalue, 70.115557, 5)

    def test_pvalue(self):
        assert_almost_equal(self.Ftest1.pvalue, 6.229e-07, 10)

    def test_df_denom(self):
        assert_equal(self.Ftest1.df_denom, 9)

    def test_df_num(self):
        assert_equal(self.Ftest1.df_num, 5)


class TestTtest(object):
    '''
    Test individual t-tests.  Ie., are the coefficients significantly
    different than zero.

        '''
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        cls.res1 = OLS(data.endog, data.exog).fit()
        R = np.identity(7)
        cls.Ttest = cls.res1.t_test(R)

#    def setup(self):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        else:
#        self.R_Results = RModel(data.endog, data.exog, r.lm).robj

    def test_tvalue(self):
        assert_almost_equal(self.Ttest.tvalue, self.res1.tvalues, DECIMAL_4)

    def test_sd(self):
        assert_almost_equal(self.Ttest.sd, self.res1.bse, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(self.Ttest.pvalue,
                student_t.sf(np.abs(self.res1.tvalues),self.res1.model.df_resid),
                    DECIMAL_4)

    def test_df_denom(self):
        assert_equal(self.Ttest.df_denom, self.res1.model.df_resid)

    def test_effect(self):
        assert_almost_equal(self.Ttest.effect, self.res1.params)

class TestTtest2(object):
    '''
    Tests the hypothesis that the coefficients on POP and YEAR
    are equal.

    Results from RPy using 'car' package.
    '''
    @classmethod
    def setupClass(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        try:
#            r.library('car')
#        except RPyRException:
#            raise SkipTest, "car library not installed for R"
        R = np.zeros(7)
        R[4:6] = [1,-1]
#        self.R = R
        data = longley.load()
        data.exog = add_constant(data.exog)
        res1 = OLS(data.endog, data.exog).fit()
        cls.Ttest1 = res1.t_test(R)
#        self.R_Results = RModel(self.data.endog, self.data.exog, r.lm).robj
#        self.Ttest2 = r.linear_hypothesis(self.R_Results, 'x.5 = x.6')
#        t = np.sign(np.inner(R, self.res1.params))*\
#            np.sqrt(self.Ttest2['F'][1])
#        self.t = t
#        self.effect = np.sum(R * self.res1.params)

    def test_tvalue(self):
        assert_almost_equal(self.Ttest1.tvalue, -4.0167754636397284,
                DECIMAL_4)

    def test_sd(self):
        assert_almost_equal(self.Ttest1.sd, 455.39079425195314, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(self.Ttest1.pvalue, 0.0015163772380932246,
            DECIMAL_4)

    def test_df_denom(self):
        assert_equal(self.Ttest1.df_denom, 9)

    def test_effect(self):
        assert_almost_equal(self.Ttest1.effect, -1829.2025687186533, DECIMAL_4)

class TestGLS(object):
    '''
    These test results were obtained by replication with R.
    '''
    @classmethod
    def setupClass(cls):
        from results.results_regression import LongleyGls

        data = longley.load()
        exog = add_constant(np.column_stack(\
                (data.exog[:,1],data.exog[:,4])))
        tmp_results = OLS(data.endog, exog).fit()
        rho = np.corrcoef(tmp_results.resid[1:],
                tmp_results.resid[:-1])[0][1] # by assumption
        order = toeplitz(np.arange(16))
        sigma = rho**order
        GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
        cls.res1 = GLS_results
        cls.res2 = LongleyGls()

    def test_aic(self):
        assert_approx_equal(self.res1.aic+2, self.res2.aic, 3)

    def test_bic(self):
        assert_approx_equal(self.res1.bic, self.res2.bic, 2)

    def test_loglike(self):
        assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_0)

    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_1)

    def test_resid(self):
        assert_almost_equal(self.res1.resid, self.res2.resid, DECIMAL_4)

    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale, DECIMAL_4)

    def test_tvalues(self):
        assert_almost_equal(self.res1.tvalues, self.res2.tvalues, DECIMAL_4)

    def test_standarderrors(self):
        assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4)

    def test_fittedvalues(self):
        assert_almost_equal(self.res1.fittedvalues, self.res2.fittedvalues,
                DECIMAL_4)

    def test_pvalues(self):
        assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4)

class TestGLS_nosigma(CheckRegressionResults):
    '''
    Test that GLS with no argument is equivalent to OLS.
    '''
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        ols_res = OLS(data.endog, data.exog).fit()
        gls_res = GLS(data.endog, data.exog).fit()
        cls.res1 = gls_res
        cls.res2 = ols_res
#        self.res2.conf_int = self.res2.conf_int()

#    def check_confidenceintervals(self, conf1, conf2):
#        assert_almost_equal(conf1, conf2, DECIMAL_4)

#class TestWLS(CheckRegressionResults):
#    '''
#    Test WLS with Greene's credit card data
#    '''
#    def __init__(self):
#        from scikits.statsmodels.datasets.ccard import load
#        self.data = load()
#        self.res1 = WLS(self.data.endog, self.data.exog,
#                weights=1/self.data.exog[:,2]).fit()
#FIXME: triaged results for noconstant
#        self.res1.ess = self.res1.uncentered_tss - self.res1.ssr
#        self.res1.rsquared = self.res1.ess/self.res1.uncentered_tss
#        self.res1.mse_model = self.res1.ess/(self.res1.df_model + 1)
#        self.res1.fvalue = self.res1.mse_model/self.res1.mse_resid
#        self.res1.rsquared_adj = 1 -(self.res1.nobs)/(self.res1.df_resid)*\
#                (1-self.res1.rsquared)

#    def setup(self):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        self.res2 = RModel(self.data.endog, self.data.exog, r.lm,
#                        weights=1/self.data.exog[:,2])
#        self.res2.wresid = self.res2.rsum['residuals']
#        self.res2.scale = self.res2.scale**2 # R has sigma not sigma**2

#    def check_confidenceintervals(self, conf1, conf2):
#        assert_almost_equal(conf1, conf2, DECIMAL_4)


class TestWLS_GLS(CheckRegressionResults):
    @classmethod
    def setupClass(cls):
        from scikits.statsmodels.datasets.ccard import load
        data = load()
        cls.res1 = WLS(data.endog, data.exog, weights = 1/data.exog[:,2]).fit()
        cls.res2 = GLS(data.endog, data.exog, sigma = data.exog[:,2]).fit()

    def check_confidenceintervals(self, conf1, conf2):
        assert_almost_equal(conf1, conf2(), DECIMAL_4)

class TestWLS_OLS(CheckRegressionResults):
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        cls.res1 = OLS(data.endog, data.exog).fit()
        cls.res2 = WLS(data.endog, data.exog).fit()

    def check_confidenceintervals(self, conf1, conf2):
        assert_almost_equal(conf1, conf2(), DECIMAL_4)

class TestGLS_OLS(CheckRegressionResults):
    @classmethod
    def setupClass(cls):
        data = longley.load()
        data.exog = add_constant(data.exog)
        cls.res1 = GLS(data.endog, data.exog).fit()
        cls.res2 = OLS(data.endog, data.exog).fit()

    def check_confidenceintervals(self, conf1, conf2):
        assert_almost_equal(conf1, conf2(), DECIMAL_4)

#TODO: test AR
# why the two-stage in AR?
#class test_ar(object):
#    from scikits.statsmodels.datasets.sunspots import load
#    data = load()
#    model = AR(data.endog, rho=4).fit()
#    R_res = RModel(data.endog, aic="FALSE", order_max=4)

#    def test_params(self):
#        assert_almost_equal(self.model.rho,
#        pass

#    def test_order(self):
# In R this can be defined or chosen by minimizing the AIC if aic=True
#        pass


class TestYuleWalker(object):
    @classmethod
    def setupClass(cls):
        from scikits.statsmodels.datasets.sunspots import load
        data = load()
        cls.rho, cls.sigma = yule_walker(data.endog, order=4,
                method="mle")
        cls.R_params = [1.2831003105694765, -0.45240924374091945,
                -0.20770298557575195, 0.047943648089542337]

#    def setup(self):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#
#        R_results = r.ar(self.data.endog, aic="FALSE", order_max=4)
#        self.R_params = R_results['ar']

    def test_params(self):
        assert_almost_equal(self.rho, self.R_params, DECIMAL_4)

class TestDataDimensions(CheckRegressionResults):
    @classmethod
    def setupClass(cls):
        np.random.seed(54321)
        cls.endog_n_ = np.random.uniform(0,20,size=30)
        cls.endog_n_one = cls.endog_n_[:,None]
        cls.exog_n_ = np.random.uniform(0,20,size=30)
        cls.exog_n_one = cls.exog_n_[:,None]
        cls.degen_exog = cls.exog_n_one[:-1]
        cls.mod1 = OLS(cls.endog_n_one, cls.exog_n_one)
        cls.mod1.df_model += 1
        #cls.mod1.df_resid -= 1
        cls.res1 = cls.mod1.fit()
        # Note that these are created for every subclass..
        # A little extra overhead probably
        cls.mod2 = OLS(cls.endog_n_one, cls.exog_n_one)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()

    def check_confidenceintervals(self, conf1, conf2):
        assert_almost_equal(conf1, conf2(), DECIMAL_4)

class TestNxNx(TestDataDimensions):
    @classmethod
    def setupClass(cls):
        super(TestNxNx, cls).setupClass()
        cls.mod2 = OLS(cls.endog_n_, cls.exog_n_)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()

class TestNxOneNx(TestDataDimensions):
    @classmethod
    def setupClass(cls):
        super(TestNxOneNx, cls).setupClass()
        cls.mod2 = OLS(cls.endog_n_one, cls.exog_n_)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()

class TestNxNxOne(TestDataDimensions):
    @classmethod
    def setupClass(cls):
        super(TestNxNxOne, cls).setupClass()
        cls.mod2 = OLS(cls.endog_n_, cls.exog_n_one)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()

def test_bad_size():
    np.random.seed(54321)
    data = np.random.uniform(0,20,31)
    assert_raises(ValueError, OLS, data, data[1:])

if __name__=="__main__":

    import nose
    run_module_suite()
    #nose.runmodule(argv=[__file__,'-vvs','-x'], exit=False) #, '--pdb'
steminc / scikits.statsmodels python

Products

About

Resources

Contact Gemfury