Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ discrete / tests / test_count_model.py

from statsmodels.compat.platform import PLATFORM_LINUX32

import numpy as np
from numpy.testing import (assert_,
                           assert_equal, assert_array_equal, assert_allclose)
import pytest

import statsmodels.api as sm
from .results.results_discrete import RandHIE
from .test_discrete import CheckModelMixin


class CheckGeneric(CheckModelMixin):
    def test_params(self):
        assert_allclose(self.res1.params, self.res2.params, atol=1e-5, rtol=1e-5)

    def test_llf(self):
        assert_allclose(self.res1.llf, self.res2.llf, atol=1e-5, rtol=1e-5)

    def test_conf_int(self):
        assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-5)

    def test_bse(self):
        assert_allclose(self.res1.bse, self.res2.bse, atol=1e-3, rtol=1e-3)

    def test_aic(self):
        assert_allclose(self.res1.aic, self.res2.aic, atol=1e-2, rtol=1e-2)

    def test_bic(self):
        assert_allclose(self.res1.aic, self.res2.aic, atol=1e-1, rtol=1e-1)

    def test_t(self):
        unit_matrix = np.identity(self.res1.params.size)
        t_test = self.res1.t_test(unit_matrix)
        assert_allclose(self.res1.tvalues, t_test.tvalue)

    def test_fit_regularized(self):
        model = self.res1.model

        alpha = np.ones(len(self.res1.params))
        alpha[-2:] = 0
        res_reg = model.fit_regularized(alpha=alpha*0.01, disp=0, maxiter=500)

        assert_allclose(res_reg.params[2:], self.res1.params[2:],
            atol=5e-2, rtol=5e-2)

    def test_init_keys(self):
        init_kwds = self.res1.model._get_init_kwds()
        assert_equal(set(init_kwds.keys()), set(self.init_keys))
        for key, value in self.init_kwds.items():
            assert_equal(init_kwds[key], value)

    def test_null(self):
        # call llnull, so null model is attached, side effect of cached attribute
        self.res1.llnull
        # check model instead of value
        exog_null = self.res1.res_null.model.exog
        exog_infl_null = self.res1.res_null.model.exog_infl
        assert_array_equal(exog_infl_null.shape,
                     (len(self.res1.model.exog), 1))
        assert_equal(np.ptp(exog_null), 0)
        assert_equal(np.ptp(exog_infl_null), 0)

    @pytest.mark.smoke
    def test_summary(self):
        summ = self.res1.summary()
        # GH 4581
        assert 'Covariance Type:' in str(summ)


class TestZeroInflatedModel_logit(CheckGeneric):
    @classmethod
    def setup_class(cls):
        data = sm.datasets.randhie.load(as_pandas=False)
        cls.endog = data.endog
        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
            exog_infl=exog_infl, inflation='logit').fit(method='newton', maxiter=500,
                                                        disp=0)
        # for llnull test
        cls.res1._results._attach_nullmodel = True
        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
        cls.init_kwds = {'inflation': 'logit'}
        res2 = RandHIE.zero_inflated_poisson_logit
        cls.res2 = res2

class TestZeroInflatedModel_probit(CheckGeneric):
    @classmethod
    def setup_class(cls):
        data = sm.datasets.randhie.load(as_pandas=False)
        cls.endog = data.endog
        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
            exog_infl=exog_infl, inflation='probit').fit(method='newton', maxiter=500,
                                                         disp=0)
        # for llnull test
        cls.res1._results._attach_nullmodel = True
        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
        cls.init_kwds = {'inflation': 'probit'}
        res2 = RandHIE.zero_inflated_poisson_probit
        cls.res2 = res2

    @pytest.mark.skipif(PLATFORM_LINUX32, reason="Fails on 32-bit Linux")
    def test_fit_regularized(self):
        super().test_fit_regularized()

class TestZeroInflatedModel_offset(CheckGeneric):
    @classmethod
    def setup_class(cls):
        data = sm.datasets.randhie.load(as_pandas=False)
        cls.endog = data.endog
        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
            exog_infl=exog_infl, offset=data.exog[:,7]).fit(method='newton',
                                                            maxiter=500,
                                                            disp=0)
        # for llnull test
        cls.res1._results._attach_nullmodel = True
        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
        cls.init_kwds = {'inflation': 'logit'}
        res2 = RandHIE.zero_inflated_poisson_offset
        cls.res2 = res2

    def test_exposure(self):
        # This test mostly the equivalence of offset and exposure = exp(offset)
        # use data arrays from class model
        model1 = self.res1.model
        offset = model1.offset
        model3 = sm.ZeroInflatedPoisson(model1.endog, model1.exog,
            exog_infl=model1.exog_infl, exposure=np.exp(offset))
        res3 = model3.fit(start_params=self.res1.params,
                          method='newton', maxiter=500, disp=0)

        assert_allclose(res3.params, self.res1.params, atol=1e-6, rtol=1e-6)
        fitted1 = self.res1.predict()
        fitted3 = self.res1.predict()
        assert_allclose(fitted3, fitted1, atol=1e-6, rtol=1e-6)

        ex = model1.exog
        ex_infl = model1.exog_infl
        offset = model1.offset
        fitted1_0 = self.res1.predict(exog=ex, exog_infl=ex_infl,
                                      offset=offset)
        fitted3_0 = res3.predict(exog=ex, exog_infl=ex_infl,
                                 exposure=np.exp(offset))
        assert_allclose(fitted3_0, fitted1_0, atol=1e-6, rtol=1e-6)


        ex = model1.exog[:10:2]
        ex_infl = model1.exog_infl[:10:2]
        offset = offset[:10:2]
        # # TODO: this raises with shape mismatch,
        # # i.e. uses offset or exposure from model -> fix it or not?
        # GLM.predict to setting offset and exposure to zero
        # fitted1_1 = self.res1.predict(exog=ex, exog_infl=ex_infl)
        # fitted3_1 = res3.predict(exog=ex, exog_infl=ex_infl)
        # assert_allclose(fitted3_1, fitted1_1, atol=1e-6, rtol=1e-6)

        fitted1_2 = self.res1.predict(exog=ex, exog_infl=ex_infl,
                                      offset=offset)
        fitted3_2 = res3.predict(exog=ex, exog_infl=ex_infl,
                                 exposure=np.exp(offset))
        assert_allclose(fitted3_2, fitted1_2, atol=1e-6, rtol=1e-6)
        assert_allclose(fitted1_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)
        assert_allclose(fitted3_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)


class TestZeroInflatedModelPandas(CheckGeneric):
    @classmethod
    def setup_class(cls):
        data = sm.datasets.randhie.load_pandas()
        cls.endog = data.endog
        cls.data = data
        exog = sm.add_constant(data.exog.iloc[:,1:4], prepend=False)
        exog_infl = sm.add_constant(data.exog.iloc[:,0], prepend=False)
        # we do not need to verify convergence here
        start_params = np.asarray([0.10337834587498942, -1.0459825102508549,
                                   -0.08219794475894268, 0.00856917434709146,
                                   -0.026795737379474334, 1.4823632430107334])
        model = sm.ZeroInflatedPoisson(data.endog, exog,
            exog_infl=exog_infl, inflation='logit')
        cls.res1 = model.fit(start_params=start_params, method='newton',
                             maxiter=500, disp=0)
        # for llnull test
        cls.res1._results._attach_nullmodel = True
        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
        cls.init_kwds = {'inflation': 'logit'}
        res2 = RandHIE.zero_inflated_poisson_logit
        cls.res2 = res2

    def test_names(self):
        param_names = ['inflate_lncoins', 'inflate_const', 'idp', 'lpi',
                       'fmde', 'const']
        assert_array_equal(self.res1.model.exog_names, param_names)
        assert_array_equal(self.res1.params.index.tolist(), param_names)
        assert_array_equal(self.res1.bse.index.tolist(), param_names)

        exog = sm.add_constant(self.data.exog.iloc[:,1:4], prepend=True)
        exog_infl = sm.add_constant(self.data.exog.iloc[:,0], prepend=True)
        param_names = ['inflate_const', 'inflate_lncoins', 'const', 'idp',
                       'lpi', 'fmde']
        model = sm.ZeroInflatedPoisson(self.data.endog, exog,
            exog_infl=exog_infl, inflation='logit')
        assert_array_equal(model.exog_names, param_names)


class TestZeroInflatedPoisson_predict(object):
    @classmethod
    def setup_class(cls):
        expected_params = [1, 0.5]
        np.random.seed(123)
        nobs = 200
        exog = np.ones((nobs, 2))
        exog[:nobs//2, 1] = 2
        mu_true = exog.dot(expected_params)
        cls.endog = sm.distributions.zipoisson.rvs(mu_true, 0.05,
                                                   size=mu_true.shape)
        model = sm.ZeroInflatedPoisson(cls.endog, exog)
        cls.res = model.fit(method='bfgs', maxiter=5000, maxfun=5000, disp=0)

    def test_mean(self):
        assert_allclose(self.res.predict().mean(), self.endog.mean(),
                        atol=1e-2, rtol=1e-2)

    def test_var(self):
        assert_allclose((self.res.predict().mean() *
                        self.res._dispersion_factor.mean()),
                        self.endog.var(), atol=5e-2, rtol=5e-2)

    def test_predict_prob(self):
        res = self.res
        endog = res.model.endog

        pr = res.predict(which='prob')
        pr2 = sm.distributions.zipoisson.pmf(np.arange(7)[:,None],
            res.predict(), 0.05).T
        assert_allclose(pr, pr2, rtol=0.05, atol=0.05)

@pytest.mark.slow
class TestZeroInflatedGeneralizedPoisson(CheckGeneric):
    @classmethod
    def setup_class(cls):
        data = sm.datasets.randhie.load(as_pandas=False)
        cls.endog = data.endog
        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
        cls.res1 = sm.ZeroInflatedGeneralizedPoisson(data.endog, exog,
            exog_infl=exog_infl, p=1).fit(method='newton', maxiter=500, disp=0)
        # for llnull test
        cls.res1._results._attach_nullmodel = True
        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p']
        cls.init_kwds = {'inflation': 'logit', 'p': 1}
        res2 = RandHIE.zero_inflated_generalized_poisson
        cls.res2 = res2

    def test_bse(self):
        pass

    def test_conf_int(self):
        pass

    def test_bic(self):
        pass

    def test_t(self):
        unit_matrix = np.identity(self.res1.params.size)
        t_test = self.res1.t_test(unit_matrix)
        assert_allclose(self.res1.tvalues, t_test.tvalue)

    def test_minimize(self, reset_randomstate):
        # check additional optimizers using the `minimize` option
        model = self.res1.model
        # use the same start_params, but avoid recomputing
        start_params = self.res1.mle_settings['start_params']

        res_ncg = model.fit(start_params=start_params,
                            method='minimize', min_method="trust-ncg",
                            maxiter=500, disp=0)

        assert_allclose(res_ncg.params, self.res2.params,
                        atol=1e-3, rtol=0.04)
        assert_allclose(res_ncg.bse, self.res2.bse,
                        atol=1e-3, rtol=0.6)
        assert_(res_ncg.mle_retvals['converged'] is True)

        res_dog = model.fit(start_params=start_params,
                            method='minimize', min_method="dogleg",
                            maxiter=500, disp=0)

        assert_allclose(res_dog.params, self.res2.params,
                        atol=1e-3, rtol=3e-3)
        assert_allclose(res_dog.bse, self.res2.bse,
                        atol=1e-3, rtol=0.6)
        assert_(res_dog.mle_retvals['converged'] is True)

        # Ser random_state here to improve reproducibility
        random_state = np.random.RandomState(1)
        seed = {'seed': random_state}
        res_bh = model.fit(start_params=start_params,
                           method='basinhopping', niter=500, stepsize=0.1,
                           niter_success=None, disp=0, interval=1, **seed)

        assert_allclose(res_bh.params, self.res2.params,
                        atol=1e-4, rtol=1e-4)
        assert_allclose(res_bh.bse, self.res2.bse,
                        atol=1e-3, rtol=0.6)
        # skip, res_bh reports converged is false but params agree
        #assert_(res_bh.mle_retvals['converged'] is True)

class TestZeroInflatedGeneralizedPoisson_predict(object):
    @classmethod
    def setup_class(cls):
        expected_params = [1, 0.5, 0.5]
        np.random.seed(1234)
        nobs = 200
        exog = np.ones((nobs, 2))
        exog[:nobs//2, 1] = 2
        mu_true = exog.dot(expected_params[:-1])
        cls.endog = sm.distributions.zigenpoisson.rvs(mu_true, expected_params[-1],
                                                      2, 0.5, size=mu_true.shape)
        model = sm.ZeroInflatedGeneralizedPoisson(cls.endog, exog, p=2)
        cls.res = model.fit(method='bfgs', maxiter=5000, maxfun=5000, disp=0)

    def test_mean(self):
        assert_allclose(self.res.predict().mean(), self.endog.mean(),
                        atol=1e-4, rtol=1e-4)

    def test_var(self):
        assert_allclose((self.res.predict().mean() *
                        self.res._dispersion_factor.mean()),
                        self.endog.var(), atol=0.05, rtol=0.1)

    def test_predict_prob(self):
        res = self.res
        endog = res.model.endog

        pr = res.predict(which='prob')
        pr2 = sm.distributions.zinegbin.pmf(np.arange(12)[:,None],
            res.predict(), 0.5, 2, 0.5).T
        assert_allclose(pr, pr2, rtol=0.08, atol=0.05)

class TestZeroInflatedNegativeBinomialP(CheckGeneric):
Loading ...