Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ robust / tests / test_rlm.py

"""
Test functions for sm.rlm
"""
import numpy as np
from numpy.testing import assert_almost_equal, assert_allclose
import pytest
from scipy import stats
import statsmodels.api as sm
from statsmodels.robust.robust_linear_model import RLM
from statsmodels.robust import norms
from statsmodels.robust.scale import HuberScale

DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1


class CheckRlmResultsMixin(object):
    '''
    res2 contains  results from Rmodelwrap or were obtained from a statistical
    packages such as R, Stata, or SAS and written to results.results_rlm

    Covariance matrices were obtained from SAS and are imported from
    results.results_rlm
    '''
    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)

    decimal_standarderrors = DECIMAL_4

    def test_standarderrors(self):
        assert_almost_equal(self.res1.bse, self.res2.bse,
                            self.decimal_standarderrors)

    # TODO: get other results from SAS, though if it works for one...
    def test_confidenceintervals(self):
        if not hasattr(self.res2, 'conf_int'):
            pytest.skip("Results from R")

        assert_almost_equal(self.res1.conf_int(), self.res2.conf_int(),
                            DECIMAL_4)

    decimal_scale = DECIMAL_4

    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale,
                            self.decimal_scale)

    def test_weights(self):
        assert_almost_equal(self.res1.weights, self.res2.weights, DECIMAL_4)

    def test_residuals(self):
        assert_almost_equal(self.res1.resid, self.res2.resid, DECIMAL_4)

    def test_degrees(self):
        assert_almost_equal(self.res1.model.df_model, self.res2.df_model,
                            DECIMAL_4)
        assert_almost_equal(self.res1.model.df_resid, self.res2.df_resid,
                            DECIMAL_4)

    def test_bcov_unscaled(self):
        if not hasattr(self.res2, 'bcov_unscaled'):
            pytest.skip("No unscaled cov matrix from SAS")

        assert_almost_equal(self.res1.bcov_unscaled,
                            self.res2.bcov_unscaled, DECIMAL_4)

    decimal_bcov_scaled = DECIMAL_4

    def test_bcov_scaled(self):
        assert_almost_equal(self.res1.bcov_scaled, self.res2.h1,
                            self.decimal_bcov_scaled)
        assert_almost_equal(self.res1.h2, self.res2.h2,
                            self.decimal_bcov_scaled)
        assert_almost_equal(self.res1.h3, self.res2.h3,
                            self.decimal_bcov_scaled)

    def test_tvalues(self):
        if not hasattr(self.res2, 'tvalues'):
            pytest.skip("No tvalues in benchmark")

        assert_allclose(self.res1.tvalues, self.res2.tvalues, rtol=0.003)

    def test_tpvalues(self):
        # test comparing tvalues and pvalues with normal implementation
        # make sure they use normal distribution (inherited in results class)
        params = self.res1.params
        tvalues = params / self.res1.bse
        pvalues = stats.norm.sf(np.abs(tvalues)) * 2
        half_width = stats.norm.isf(0.025) * self.res1.bse
        conf_int = np.column_stack((params - half_width, params + half_width))

        assert_almost_equal(self.res1.tvalues, tvalues)
        assert_almost_equal(self.res1.pvalues, pvalues)
        assert_almost_equal(self.res1.conf_int(), conf_int)


class TestRlm(CheckRlmResultsMixin):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        cls.model = model
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import Huber
        self.res2 = Huber()

    @pytest.mark.smoke
    def test_summary(self):
        self.res1.summary()

    @pytest.mark.smoke
    def test_summary2(self):
        self.res1.summary2()

    @pytest.mark.smoke
    def test_chisq(self):
        assert isinstance(self.res1.chisq, np.ndarray)

    @pytest.mark.smoke
    def test_predict(self):
        assert isinstance(self.model.predict(self.res1.params), np.ndarray)


class TestHampel(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestHampel, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_2
        cls.decimal_scale = DECIMAL_3
        cls.decimal_bcov_scaled = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.Hampel())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import Hampel
        self.res2 = Hampel()


class TestRlmBisquare(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestRlmBisquare, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import BiSquare
        self.res2 = BiSquare()


class TestRlmAndrews(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestRlmAndrews, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.AndrewWave())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import Andrews
        self.res2 = Andrews()


# --------------------------------------------------------------------
# tests with Huber scaling

class TestRlmHuber(CheckRlmResultsMixin):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import HuberHuber
        self.res2 = HuberHuber()


class TestHampelHuber(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestHampelHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.Hampel())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import HampelHuber
        self.res2 = HampelHuber()


class TestRlmBisquareHuber(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestRlmBisquareHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import BisquareHuber
        self.res2 = BisquareHuber()


class TestRlmAndrewsHuber(TestRlm):
    @classmethod
    def setup_class(cls):
        super(TestRlmAndrewsHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.AndrewWave())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import AndrewsHuber
        self.res2 = AndrewsHuber()


class TestRlmSresid(CheckRlmResultsMixin):
    # Check GH#187
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(conv='sresid')
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3

    def setup(self):
        from .results.results_rlm import Huber
        self.res2 = Huber()


@pytest.mark.smoke
def test_missing():
    # see GH#2083
    import statsmodels.formula.api as smf

    d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, np.nan]}
    smf.rlm('Foo ~ Bar', data=d)


def test_rlm_start_values():
    data = sm.datasets.stackloss.load_pandas()
    exog = sm.add_constant(data.exog, prepend=False)
    model = RLM(data.endog, exog, M=norms.HuberT())
    results = model.fit()
    start_params = [0.7156402, 1.29528612, -0.15212252, -39.91967442]
    result_sv = model.fit(start_params=start_params)
    assert_allclose(results.params, result_sv.params)


def test_rlm_start_values_errors():
    data = sm.datasets.stackloss.load_pandas()
    exog = sm.add_constant(data.exog, prepend=False)
    model = RLM(data.endog, exog, M=norms.HuberT())
    start_params = [0.7156402, 1.29528612, -0.15212252]
    with pytest.raises(ValueError):
        model.fit(start_params=start_params)

    start_params = np.array([start_params, start_params]).T
    with pytest.raises(ValueError):
        model.fit(start_params=start_params)


@pytest.fixture(scope='module',
                params=[norms.AndrewWave, norms.LeastSquares, norms.HuberT,
                        norms.TrimmedMean, norms.TukeyBiweight, norms.Hampel,
                        norms.RamsayE])
def norm(request):
    return request.param()


@pytest.fixture(scope='module')
def perfect_fit_data(request):
    from statsmodels.tools.tools import Bunch
    rs = np.random.RandomState(1249328932)
    exog = rs.standard_normal((1000, 1))
    endog = exog + exog ** 2
Loading ...