Gemfury

steminc / scikits.statsmodels python

Repository URL to install this package:
Details
scikits.statsmodels / statsmodels / examples / ex_generic_mle_t.py
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 28 08:28:04 2010

Author: josef-pktd
"""


import numpy as np

from scipy import stats, special
import scikits.statsmodels.api as sm
from scikits.statsmodels.model import GenericLikelihoodModel

#redefine some shortcuts
np_log = np.log
np_pi = np.pi
sps_gamln = special.gammaln


def maxabs(arr1, arr2):
    return np.max(np.abs(arr1 - arr2))

def maxabsrel(arr1, arr2):
    return np.max(np.abs(arr2 / arr1 - 1))



class MyT(GenericLikelihoodModel):
    '''Maximum Likelihood Estimation of Poisson Model

    This is an example for generic MLE which has the same
    statistical model as discretemod.Poisson.

    Except for defining the negative log-likelihood method, all
    methods and results are generic. Gradients and Hessian
    and all resulting statistics are based on numerical
    differentiation.

    '''

    def loglike(self, params):
        return -self.nloglikeobs(params).sum(0)

    # copied from discretemod.Poisson
    def nloglikeobs(self, params):
        """
        Loglikelihood of Poisson model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        The log likelihood of the model evaluated at `params`

        Notes
        --------
        .. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
        """
        #print len(params),
        beta = params[:-2]
        df = params[-2]
        scale = params[-1]
        loc = np.dot(self.exog, beta)
        endog = self.endog
        x = (endog - loc)/scale
        #next part is stats.t._logpdf
        lPx = sps_gamln((df+1)/2) - sps_gamln(df/2.)
        lPx -= 0.5*np_log(df*np_pi) + (df+1)/2.*np_log(1+(x**2)/df)
        lPx -= np_log(scale)  # correction for scale
        return -lPx


#Example:
np.random.seed(98765678)
nobs = 1000
rvs = np.random.randn(nobs,5)
data_exog = sm.add_constant(rvs)
xbeta = 0.9 + 0.1*rvs.sum(1)
data_endog = xbeta + 0.1*np.random.standard_t(5, size=nobs)
#print data_endog

modp = MyT(data_endog, data_exog)
modp.start_value = np.ones(data_exog.shape[1]+2)
modp.start_value[-2] = 10
modp.start_params = modp.start_value
resp = modp.fit(start_params = modp.start_value)
print resp.params
print resp.bse

from scikits.statsmodels.sandbox.regression.numdiff import approx_fprime1, approx_hess

hb=-approx_hess(modp.start_value, modp.loglike, epsilon=-1e-4)[0]
tmp = modp.loglike(modp.start_value)
print tmp.shape


'''
>>> tmp = modp.loglike(modp.start_value)
8
>>> tmp.shape
(100,)
>>> tmp.sum(0)
-24220.877108016182
>>> tmp = modp.nloglikeobs(modp.start_value)
8
>>> tmp.shape
(100, 100)
>>> np.dot(modp.exog, beta).shape
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'beta' is not defined

>>> params = modp.start_value
>>> beta = params[:-2]
>>> beta.shape
(6,)
>>> np.dot(modp.exog, beta).shape
(100,)
>>> modp.endog.shape
(100, 100)
>>> xbeta.shape
(100,)
>>>
'''

'''
C:\Programs\Python25\lib\site-packages\matplotlib-0.99.1-py2.5-win32.egg\matplotlib\rcsetup.py:117: UserWarning: rcParams key "numerix" is obsolete and has no effect;
 please delete it from your matplotlibrc file
  warnings.warn('rcParams key "numerix" is obsolete and has no effect;\n'
repr(start_params) array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])
Optimization terminated successfully.
         Current function value: 91.897859
         Iterations: 108
         Function evaluations: 173
         Gradient evaluations: 173
[  1.58253308e-01   1.73188603e-01   1.77357447e-01   2.06707494e-02
  -1.31174789e-01   8.79915580e-01   6.47663840e+03   6.73457641e+02]
[         NaN          NaN          NaN          NaN          NaN
  28.26906182          NaN          NaN]
()
>>> resp.params
array([  1.58253308e-01,   1.73188603e-01,   1.77357447e-01,
         2.06707494e-02,  -1.31174789e-01,   8.79915580e-01,
         6.47663840e+03,   6.73457641e+02])
>>> resp.bse
array([         NaN,          NaN,          NaN,          NaN,
                NaN,  28.26906182,          NaN,          NaN])
>>> resp.jac
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'GenericLikelihoodModelResults' object has no attribute 'jac'

>>> resp.bsejac
array([    45243.35919908,     51997.80776897,     41418.33021984,
           42763.46575168,     50101.91631612,     42804.92083525,
         3005625.35649203,  13826948.68708931])
>>> resp.bsejhj
array([ 1.51643931,  0.80229636,  0.27720185,  0.4711138 ,  0.9028682 ,
        0.31673747,  0.00524426,  0.69729368])
>>> resp.covjac
array([[  2.04696155e+09,   1.46643494e+08,   7.59932781e+06,
         -2.39993397e+08,   5.62644255e+08,   2.34300598e+08,
         -3.07824799e+09,  -1.93425470e+10],
       [  1.46643494e+08,   2.70377201e+09,   1.06005712e+08,
          3.76824011e+08,  -1.21778986e+08,   5.38612723e+08,
         -2.12575784e+10,  -1.69503271e+11],
       [  7.59932781e+06,   1.06005712e+08,   1.71547808e+09,
         -5.94451158e+07,  -1.44586401e+08,  -5.41830441e+06,
          1.25899515e+10,   1.06372065e+11],
       [ -2.39993397e+08,   3.76824011e+08,  -5.94451158e+07,
          1.82871400e+09,  -5.66930891e+08,   3.75061111e+08,
         -6.84681772e+09,  -7.29993789e+10],
       [  5.62644255e+08,  -1.21778986e+08,  -1.44586401e+08,
         -5.66930891e+08,   2.51020202e+09,  -4.67886982e+08,
          1.78890380e+10,   1.75428694e+11],
       [  2.34300598e+08,   5.38612723e+08,  -5.41830441e+06,
          3.75061111e+08,  -4.67886982e+08,   1.83226125e+09,
         -1.27484996e+10,  -1.12550321e+11],
       [ -3.07824799e+09,  -2.12575784e+10,   1.25899515e+10,
         -6.84681772e+09,   1.78890380e+10,  -1.27484996e+10,
          9.03378378e+12,   2.15188047e+13],
       [ -1.93425470e+10,  -1.69503271e+11,   1.06372065e+11,
         -7.29993789e+10,   1.75428694e+11,  -1.12550321e+11,
          2.15188047e+13,   1.91184510e+14]])
>>> hb
array([[  33.68732564,   -2.33209221,  -13.51255321,   -1.60840159,
         -13.03920385,   -9.3506543 ,    4.86239173,   -9.30409101],
       [  -2.33209221,    3.12512611,   -6.08530968,   -6.79232244,
           3.66804898,    1.26497071,    5.10113409,   -2.53482995],
       [ -13.51255321,   -6.08530968,   31.14883498,   -5.01514705,
         -10.48819911,   -2.62533035,    3.82241581,  -12.51046342],
       [  -1.60840159,   -6.79232244,   -5.01514705,   28.40141917,
          -8.72489636,   -8.82449456,    5.47584023,  -18.20500017],
       [ -13.03920385,    3.66804898,  -10.48819911,   -8.72489636,
           9.03650914,    3.65206176,    6.55926726,   -1.8233635 ],
       [  -9.3506543 ,    1.26497071,   -2.62533035,   -8.82449456,
           3.65206176,   21.41825348,   -1.28610793,    4.28101146],
       [   4.86239173,    5.10113409,    3.82241581,    5.47584023,
           6.55926726,   -1.28610793,   46.52354448,  -32.23861427],
       [  -9.30409101,   -2.53482995,  -12.51046342,  -18.20500017,
          -1.8233635 ,    4.28101146,  -32.23861427,  178.61978279]])
>>> np.linalg.eigh(hb)
(array([ -10.50373649,    0.7460258 ,   14.73131793,   29.72453087,
         36.24103832,   41.98042979,   48.99815223,  190.04303734]), array([[-0.40303259,  0.10181305,  0.18164206,  0.48201456,  0.03916688,
         0.00903695,  0.74620692,  0.05853619],
       [-0.3201713 , -0.88444855, -0.19867642,  0.02828812,  0.16733946,
        -0.21440765, -0.02927317,  0.01176904],
       [-0.41847094,  0.00170161,  0.04973298,  0.43276118, -0.55894304,
         0.26454728, -0.49745582,  0.07251685],
       [-0.3508729 , -0.08302723,  0.25004884, -0.73495077, -0.38936448,
         0.20677082,  0.24464779,  0.11448238],
       [-0.62065653,  0.44662675, -0.37388565, -0.19453047,  0.29084735,
        -0.34151809, -0.19088978,  0.00342713],
       [-0.15119802, -0.01099165,  0.84377273,  0.00554863,  0.37332324,
        -0.17917015, -0.30371283, -0.03635211],
       [ 0.15813581,  0.0293601 ,  0.09882271,  0.03515962, -0.48768565,
        -0.81960996,  0.05248464,  0.22533642],
       [-0.06118044, -0.00549223,  0.03205047, -0.01782649, -0.21128588,
        -0.14391393,  0.05973658, -0.96226835]]))
>>> np.linalg.eigh(np.linalg.inv(hb))
(array([-0.09520422,  0.00526197,  0.02040893,  0.02382062,  0.02759303,
        0.03364225,  0.06788259,  1.34043621]), array([[-0.40303259,  0.05853619,  0.74620692, -0.00903695, -0.03916688,
         0.48201456,  0.18164206,  0.10181305],
       [-0.3201713 ,  0.01176904, -0.02927317,  0.21440765, -0.16733946,
         0.02828812, -0.19867642, -0.88444855],
       [-0.41847094,  0.07251685, -0.49745582, -0.26454728,  0.55894304,
         0.43276118,  0.04973298,  0.00170161],
       [-0.3508729 ,  0.11448238,  0.24464779, -0.20677082,  0.38936448,
        -0.73495077,  0.25004884, -0.08302723],
       [-0.62065653,  0.00342713, -0.19088978,  0.34151809, -0.29084735,
        -0.19453047, -0.37388565,  0.44662675],
       [-0.15119802, -0.03635211, -0.30371283,  0.17917015, -0.37332324,
         0.00554863,  0.84377273, -0.01099165],
       [ 0.15813581,  0.22533642,  0.05248464,  0.81960996,  0.48768565,
         0.03515962,  0.09882271,  0.0293601 ],
       [-0.06118044, -0.96226835,  0.05973658,  0.14391393,  0.21128588,
        -0.01782649,  0.03205047, -0.00549223]]))
>>> np.diag(np.linalg.inv(hb))
array([ 0.01991288,  1.0433882 ,  0.00516616,  0.02642799,  0.24732871,
        0.05281555,  0.02236704,  0.00643486])
>>> np.sqrt(np.diag(np.linalg.inv(hb)))
array([ 0.14111302,  1.02146375,  0.07187597,  0.16256686,  0.49732154,
        0.22981633,  0.14955616,  0.08021756])
>>> hess = modp.hessian(resp.params)
>>> np.sqrt(np.diag(np.linalg.inv(hess)))
array([ 231.3823423 ,  117.79508218,   31.46595143,   53.44753106,
        132.4855704 ,           NaN,    5.47881705,   90.75332693])
>>> hb=-approx_hess(resp.params, modp.loglike, epsilon=-1e-4)[0]
>>> np.sqrt(np.diag(np.linalg.inv(hb)))
array([ 31.93524822,  22.0333515 ,          NaN,  29.90198792,
        38.82615785,          NaN,          NaN,          NaN])
>>> hb=-approx_hess(resp.params, modp.loglike, epsilon=-1e-8)[0]
>>> np.sqrt(np.diag(np.linalg.inv(hb)))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Programs\Python25\lib\site-packages\numpy\linalg\linalg.py", line 423, in inv
    return wrap(solve(a, identity(a.shape[0], dtype=a.dtype)))
  File "C:\Programs\Python25\lib\site-packages\numpy\linalg\linalg.py", line 306, in solve
    raise LinAlgError, 'Singular matrix'
numpy.linalg.linalg.LinAlgError: Singular matrix
>>> resp.params
array([  1.58253308e-01,   1.73188603e-01,   1.77357447e-01,
         2.06707494e-02,  -1.31174789e-01,   8.79915580e-01,
         6.47663840e+03,   6.73457641e+02])
>>>
'''
steminc / scikits.statsmodels python

Products

About

Resources

Contact Gemfury