Gemfury

steminc / scikits.statsmodels python

Repository URL to install this package:
Details
scikits.statsmodels / statsmodels / examples / ex_generic_mle_tdist.py
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 28 08:28:04 2010

Author: josef-pktd
"""


import numpy as np

from scipy import stats, special, optimize
import scikits.statsmodels.api as sm
from scikits.statsmodels.model import GenericLikelihoodModel

#redefine some shortcuts
np_log = np.log
np_pi = np.pi
sps_gamln = special.gammaln


def maxabs(arr1, arr2):
    return np.max(np.abs(arr1 - arr2))

def maxabsrel(arr1, arr2):
    return np.max(np.abs(arr2 / arr1 - 1))

#global
store_params = []

class MyT(GenericLikelihoodModel):
    '''Maximum Likelihood Estimation of Linear Model with t-distributed errors

    This is an example for generic MLE which has the same
    statistical model as discretemod.Poisson.

    Except for defining the negative log-likelihood method, all
    methods and results are generic. Gradients and Hessian
    and all resulting statistics are based on numerical
    differentiation.

    '''


    def loglike(self, params):
        return -self.nloglikeobs(params).sum(0)

    # copied from discretemod.Poisson
    def nloglikeobs(self, params):
        """
        Loglikelihood of Poisson model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        The log likelihood of the model evaluated at `params`

        Notes
        --------
        .. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
        """
        #print len(params),
        store_params.append(params)
        if not self.fixed_params is None:
            #print 'using fixed'
            params = self.expandparams(params)

        beta = params[:-2]
        df = params[-2]
        scale = params[-1]
        loc = np.dot(self.exog, beta)
        endog = self.endog
        x = (endog - loc)/scale
        #next part is stats.t._logpdf
        lPx = sps_gamln((df+1)/2) - sps_gamln(df/2.)
        lPx -= 0.5*np_log(df*np_pi) + (df+1)/2.*np_log(1+(x**2)/df)
        lPx -= np_log(scale)  # correction for scale
        return -lPx


#Example:
np.random.seed(98765678)
nobs = 1000
nvars = 6
df = 5
rvs = np.random.randn(nobs, nvars-1)
data_exog = sm.add_constant(rvs)
xbeta = 0.9 + 0.1*rvs.sum(1)
data_endog = xbeta + 0.1*np.random.standard_t(df, size=nobs)
print data_endog.var()

res_ols = sm.OLS(data_endog, data_exog).fit()
print res_ols.scale
print np.sqrt(res_ols.scale)
print res_ols.params
kurt = stats.kurtosis(res_ols.resid)
df_fromkurt = 6./kurt + 4
print stats.t.stats(df_fromkurt, moments='mvsk')
print stats.t.stats(df, moments='mvsk')

modp = MyT(data_endog, data_exog)
start_value = 0.1*np.ones(data_exog.shape[1]+2)
#start_value = np.zeros(data_exog.shape[1]+2)
#start_value[:nvars] = sm.OLS(data_endog, data_exog).fit().params
start_value[:nvars] = res_ols.params
start_value[-2] = df_fromkurt #10
start_value[-1] = np.sqrt(res_ols.scale) #0.5
modp.start_params = start_value

#adding fixed parameters

fixdf = np.nan * np.zeros(modp.start_params.shape)
fixdf[-2] = 100

fixone = 0
if fixone:
    modp.fixed_params = fixdf
    modp.fixed_paramsmask = np.isnan(fixdf)
    modp.start_params = modp.start_params[modp.fixed_paramsmask]
else:
    modp.fixed_params = None
    modp.fixed_paramsmask = None


resp = modp.fit(start_params = modp.start_params, disp=1, method='nm')#'newton')
#resp = modp.fit(start_params = modp.start_params, disp=1, method='newton')
print '\nestimation results t-dist'
print resp.params
print resp.bse
resp2 = modp.fit(start_params = resp.params, method='Newton')
print 'using Newton'
print resp2.params
print resp2.bse

from scikits.statsmodels.sandbox.regression.numdiff import approx_fprime1, approx_hess

hb=-approx_hess(modp.start_params, modp.loglike, epsilon=-1e-4)[0]
tmp = modp.loglike(modp.start_params)
print tmp.shape
#np.linalg.eigh(np.linalg.inv(hb))[0]

pp=np.array(store_params)
print pp.min(0)
print pp.max(0)




##################### Example: Pareto
# estimating scale doesn't work yet, a bug somewhere ?
# fit_ks works well, but no bse or other result statistics yet





#import for kstest based estimation
#should be replace
import scikits.statsmodels.sandbox.stats.distributions_patch

class MyPareto(GenericLikelihoodModel):
    '''Maximum Likelihood Estimation pareto distribution

    first version: iid case, with constant parameters
    '''

    #copied from stats.distribution
    def pdf(self, x, b):
        return b * x**(-b-1)

    def loglike(self, params):
        return -self.nloglikeobs(params).sum(0)

    def nloglikeobs(self, params):
        #print params.shape
        if not self.fixed_params is None:
            #print 'using fixed'
            params = self.expandparams(params)
        b = params[0]
        loc = params[1]
        scale = params[2]
        #loc = np.dot(self.exog, beta)
        endog = self.endog
        x = (endog - loc)/scale
        logpdf = np_log(b) - (b+1.)*np_log(x)
        logpdf -= np.log(scale)
        #lb = loc + scale
        #logpdf[endog<lb] = -inf
        logpdf[x<1] = -np.inf
        return -logpdf

    def fit_ks(self):
        '''fit Pareto with nested optimization

        originally published on stackoverflow
        this doesn't trim lower values during ks optimization

        '''
        rvs = self.endog
        rvsmin = rvs.min()

        def pareto_ks(loc, rvs):
            #start_scale = rvs.min() - loc # not used yet
            #est = self.fit_fr(rvs, 1., frozen=[np.nan, loc, np.nan])
            self.fixed_params[1] = loc
            #est = self.fit(start_params=self.start_params[self.fixed_paramsmask]).params
            est = self.fit(start_params=self.start_params, method='nm').params
            args = (est[0], loc, est[1])
            return stats.kstest(rvs,'pareto',args)[0]

        locest = optimize.fmin(pareto_ks, rvsmin*0.7, (rvs,))
        est = stats.pareto.fit_fr(rvs, 9., frozen=[np.nan, locest, np.nan])
        args = (est[0], locest[0], est[1])
        return args


    def fit_ks1_trim(self):
        '''fit Pareto with nested optimization

        originally published on stackoverflow

        '''
        rvs = np.sort(self.endog)
        rvsmin = rvs.min()

        def pareto_ks(loc, rvs):
            #start_scale = rvs.min() - loc # not used yet
            est = stats.pareto.fit_fr(rvs, 1., frozen=[np.nan, loc, np.nan])
            args = (est[0], loc, est[1])
            return stats.kstest(rvs,'pareto',args)[0]

        #locest = optimize.fmin(pareto_ks, rvsmin*0.7, (rvs,))
        maxind = min(np.floor(self.nobs*0.95).astype(int), self.nobs-10)
        res = []
        for trimidx in range(self.nobs//2, maxind):
            xmin = loc = rvs[trimidx]
            res.append([trimidx, pareto_ks(loc-1e-10, rvs[trimidx:])])
        res = np.array(res)
        bestidx = res[np.argmin(res[:,1]),0].astype(int)
        print bestidx
        locest = rvs[bestidx]

        est = stats.pareto.fit_fr(rvs[bestidx:], 1., frozen=[np.nan, locest, np.nan])
        args = (est[0], locest, est[1])
        return args

    def fit_ks1(self):
        '''fit Pareto with nested optimization

        originally published on stackoverflow

        '''
        rvs = self.endog
        rvsmin = rvs.min()

        def pareto_ks(loc, rvs):
            #start_scale = rvs.min() - loc # not used yet
            est = stats.pareto.fit_fr(rvs, 1., frozen=[np.nan, loc, np.nan])
            args = (est[0], loc, est[1])
            return stats.kstest(rvs,'pareto',args)[0]

        locest = optimize.fmin(pareto_ks, rvsmin*0.7, (rvs,))
        est = stats.pareto.fit_fr(rvs, 1., frozen=[np.nan, locest, np.nan])
        args = (est[0], locest[0], est[1])
        return args

#y = stats.pareto.rvs(1, loc=10, scale=2, size=nobs)
y = stats.pareto.rvs(1, loc=0, scale=2, size=nobs)


par_start_params = np.array([1., 9., 2.])

mod_par = MyPareto(y)
mod_par.start_params = np.array([1., 10., 2.])
mod_par.start_params = np.array([1., 9., 2.])
mod_par.fixed_params = None

fixdf = np.nan * np.ones(mod_par.start_params.shape)
fixdf[1] = 9.9
#fixdf[2] = 2.
fixone = 0
if fixone:
    mod_par.fixed_params = fixdf
    mod_par.fixed_paramsmask = np.isnan(fixdf)
    mod_par.start_params = mod_par.start_params[mod_par.fixed_paramsmask]
else:
    mod_par.fixed_params = None
    mod_par.fixed_paramsmask = None

res_par = mod_par.fit(start_params=mod_par.start_params, method='nm', maxfun=10000, maxiter=5000)
#res_par2 = mod_par.fit(start_params=res_par.params, method='newton', maxfun=10000, maxiter=5000)

res_parks = mod_par.fit_ks1()

print res_par.params
#print res_par2.params
print res_parks

print res_par.params[1:].sum(), sum(res_parks[1:]), mod_par.endog.min()

mod_par.fixed_params = fixdf
mod_par.fixed_paramsmask = np.isnan(fixdf)

mod_par.start_params = par_start_params[mod_par.fixed_paramsmask]
mod_par.fit(start_params=mod_par.start_params)
res_parks2 = mod_par.fit_ks()

res_parkst = mod_par.fit_ks1_trim()
print res_parkst

'''
C:\Programs\Python25\lib\site-packages\matplotlib-0.99.1-py2.5-win32.egg\matplotlib\rcsetup.py:117: UserWarning: rcParams key "numerix" is obsolete and has no effect;
 please delete it from your matplotlibrc file
  warnings.warn('rcParams key "numerix" is obsolete and has no effect;\n'
0.0686702747648
0.0164150896481
0.128121386381
[ 0.10370428  0.09921315  0.09676723  0.10457413  0.10201618  0.89964496]
(array(0.0), array(1.4552599885729831), array(0.0), array(2.5072143354058238))
(array(0.0), array(1.6666666666666667), array(0.0), array(6.0))
repr(start_params) array([ 0.10370428,  0.09921315,  0.09676723,  0.10457413,  0.10201618,
        0.89964496,  6.39309417,  0.12812139])
Optimization terminated successfully.
         Current function value: -679.951339
         Iterations: 398
         Function evaluations: 609

estimation results t-dist
[ 0.10400826  0.10111893  0.09725133  0.10507788  0.10086163  0.8996041
  4.72131318  0.09825355]
[ 0.00365493  0.00356149  0.00349329  0.00362333  0.003732    0.00362716
  0.7232824   0.00388829]
repr(start_params) array([ 0.10400826,  0.10111893,  0.09725133,  0.10507788,  0.10086163,
        0.8996041 ,  4.72131318,  0.09825355])
Optimization terminated successfully.
         Current function value: -679.950443
         Iterations 3
using Newton
[ 0.10395383  0.10106762  0.09720665  0.10503384  0.10080599  0.89954546
  4.70918964  0.09815885]
[ 0.00365299  0.00355968  0.00349147  0.00362166  0.00373015  0.00362533
  0.72014031  0.00388434]
()
[ 0.09992709  0.09786601  0.09387356  0.10229919  0.09756623  0.85466272
  4.60459182  0.09661986]
[ 0.11308292  0.10828401  0.1028508   0.11268895  0.10934726  0.94462721
  7.15412655  0.13452746]
repr(start_params) array([ 1.,  2.])
Warning: Maximum number of function evaluations has been exceeded.
>>> res_par.params
array([  7.42705803e+152,   2.17339053e+153])
>>> mod_par.loglike(mod_p.start_params)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'mod_p' is not defined

>>> mod_par.loglike(mod_par.start_params)
-1085.1993430947232
>>> np.log(mod_par.pdf(mod_par.start_params))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: pdf() takes exactly 3 arguments (2 given)

>>> np.log(mod_par.pdf(*mod_par.start_params))
0.69314718055994529
>>> mod_par.loglike(*mod_par.start_params)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: loglike() takes exactly 2 arguments (3 given)

>>> mod_par.loglike(mod_par.start_params)
-1085.1993430947232
>>> np.log(stats.pareto.pdf(y[0],*mod_par.start_params))
-4.6414308627431353
>>> mod_par.loglike(mod_par.start_params)
-1085.1993430947232
>>> mod_par.nloglikeobs(mod_par.start_params)[0]
0.29377232943845044
>>> mod_par.start_params
array([ 1.,  2.])
>>> np.log(stats.pareto.pdf(y[0],1,9.5,2))
-1.2806918394368461
>>> mod_par.fixed_params= None
>>> mod_par.nloglikeobs(np.array([1., 10., 2.]))[0]
0.087533156771285828
>>> y[0]
12.182956907488885
>>> mod_para.endog[0]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'mod_para' is not defined

>>> mod_par.endog[0]
12.182956907488885
>>> np.log(stats.pareto.pdf(y[0],1,10,2))
-0.86821349410251702
>>> np.log(stats.pareto.pdf(y[0],1.,10.,2.))
-0.86821349410251702
>>> stats.pareto.pdf(y[0],1.,10.,2.)
0.41970067762301644
>>> mod_par.loglikeobs(np.array([1., 10., 2.]))[0]
-0.087533156771285828
>>>
'''

'''
>>> mod_par.nloglikeobs(np.array([1., 10., 2.]))[0]
0.86821349410251691
>>> np.log(stats.pareto.pdf(y,1.,10.,2.)).sum()
-2627.9403758026938
'''


#'''
#C:\Programs\Python25\lib\site-packages\matplotlib-0.99.1-py2.5-win32.egg\matplotlib\rcsetup.py:117: UserWarning: rcParams key "numerix" is obsolete and has no effect;
# please delete it from your matplotlibrc file
#  warnings.warn('rcParams key "numerix" is obsolete and has no effect;\n'
#0.0686702747648
#0.0164150896481
#0.128121386381
#[ 0.10370428  0.09921315  0.09676723  0.10457413  0.10201618  0.89964496]
#(array(0.0), array(1.4552599885729827), array(0.0), array(2.5072143354058203))
#(array(0.0), array(1.6666666666666667), array(0.0), array(6.0))
#repr(start_params) array([ 0.10370428,  0.09921315,  0.09676723,  0.10457413,  0.10201618,
#        0.89964496,  6.39309417,  0.12812139])
#Optimization terminated successfully.
#         Current function value: -679.951339
#         Iterations: 398
#         Function evaluations: 609
#
#estimation results t-dist
#[ 0.10400826  0.10111893  0.09725133  0.10507788  0.10086163  0.8996041
#  4.72131318  0.09825355]
#[ 0.00365493  0.00356149  0.00349329  0.00362333  0.003732    0.00362716
#  0.72325227  0.00388822]
#repr(start_params) array([ 0.10400826,  0.10111893,  0.09725133,  0.10507788,  0.10086163,
#        0.8996041 ,  4.72131318,  0.09825355])
#Optimization terminated successfully.
#         Current function value: -679.950443
#         Iterations 3
#using Newton
#[ 0.10395383  0.10106762  0.09720665  0.10503384  0.10080599  0.89954546
#  4.70918964  0.09815885]
#[ 0.00365299  0.00355968  0.00349147  0.00362166  0.00373015  0.00362533
#  0.72014669  0.00388436]
#()
#[ 0.09992709  0.09786601  0.09387356  0.10229919  0.09756623  0.85466272
#  4.60459182  0.09661986]
#[ 0.11308292  0.10828401  0.1028508   0.11268895  0.10934726  0.94462721
#  7.15412655  0.13452746]
#repr(start_params) array([ 1.,  2.])
#Warning: Maximum number of function evaluations has been exceeded.
#repr(start_params) array([  3.06504406e+302,   3.29325579e+303])
#Traceback (most recent call last):
#  File "C:\Josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\examples\ex_generic_mle_tdist.py", line 222, in <module>
#    res_par2 = mod_par.fit(start_params=res_par.params, method='newton', maxfun=10000, maxiter=5000)
#  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 547, in fit
#    disp=disp, callback=callback, **kwargs)
#  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 262, in fit
#    newparams = oldparams - np.dot(np.linalg.inv(H),
#  File "C:\Programs\Python25\lib\site-packages\numpy\linalg\linalg.py", line 423, in inv
#    return wrap(solve(a, identity(a.shape[0], dtype=a.dtype)))
#  File "C:\Programs\Python25\lib\site-packages\numpy\linalg\linalg.py", line 306, in solve
#    raise LinAlgError, 'Singular matrix'
#numpy.linalg.linalg.LinAlgError: Singular matrix
#
#>>> mod_par.fixed_params
#array([ NaN,  10.,  NaN])
#>>> mod_par.start_params
#array([ 1.,  2.])
#>>> np.source(stats.pareto.fit_fr)
#In file: c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\stats\distributions_patch.py
#
#def fit_fr(self, data, *args, **kwds):
#    '''estimate distribution parameters by MLE taking some parameters as fixed
#
#    Parameters
#    ----------
#    data : array, 1d
#        data for which the distribution parameters are estimated,
#    args : list ? check
#        starting values for optimization
#    kwds :
#
#      - 'frozen' : array_like
#           values for frozen distribution parameters and, for elements with
#           np.nan, the corresponding parameter will be estimated
#
#    Returns
#    -------
#    argest : array
#        estimated parameters
#
#
#    Examples
#    --------
#    generate random sample
#    >>> np.random.seed(12345)
#    >>> x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)
#
#    estimate all parameters
#    >>> stats.gamma.fit(x)
#    array([ 2.0243194 ,  0.20395655,  1.44411371])
#    >>> stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan])
#    array([ 2.0243194 ,  0.20395655,  1.44411371])
#
#    keep loc fixed, estimate shape and scale parameters
#    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, np.nan])
#    array([ 2.45603985,  1.27333105])
#
#    keep loc and scale fixed, estimate shape parameter
#    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
#    array([ 3.00048828])
#    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.2])
#    array([ 2.57792969])
#
#    estimate only scale parameter for fixed shape and loc
#    >>> stats.gamma.fit_fr(x, frozen=[2.5, 0.0, np.nan])
#    array([ 1.25087891])
#
#    Notes
#    -----
#    self is an instance of a distribution class. This can be attached to
#    scipy.stats.distributions.rv_continuous
#
#    *Todo*
#
#    * check if docstring is correct
#    * more input checking, args is list ? might also apply to current fit method
#
#    '''
#    loc0, scale0 = map(kwds.get, ['loc', 'scale'],[0.0, 1.0])
#    Narg = len(args)
#
#    if Narg == 0 and hasattr(self, '_fitstart'):
#        x0 = self._fitstart(data)
#    elif Narg > self.numargs:
#        raise ValueError, "Too many input arguments."
#    else:
#        args += (1.0,)*(self.numargs-Narg)
#        # location and scale are at the end
#        x0 = args + (loc0, scale0)
#
#    if 'frozen' in kwds:
#        frmask = np.array(kwds['frozen'])
#        if len(frmask) != self.numargs+2:
#            raise ValueError, "Incorrect number of frozen arguments."
#        else:
#            # keep starting values for not frozen parameters
#            x0  = np.array(x0)[np.isnan(frmask)]
#    else:
#        frmask = None
#
#    #print x0
#    #print frmask
#    return optimize.fmin(self.nnlf_fr, x0,
#                args=(np.ravel(data), frmask), disp=0)
#
#>>> stats.pareto.fit_fr(y, 1., frozen=[np.nan, loc, np.nan])
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#NameError: name 'loc' is not defined
#
#>>> stats.pareto.fit_fr(y, 1., frozen=[np.nan, 10., np.nan])
#array([ 1.0346268 ,  2.00184808])
#>>> stats.pareto.fit_fr(y, (1.,2), frozen=[np.nan, 10., np.nan])
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\stats\distributions_patch.py", line 273, in fit_fr
#    x0  = np.array(x0)[np.isnan(frmask)]
#ValueError: setting an array element with a sequence.
#
#>>> stats.pareto.fit_fr(y, [1.,2], frozen=[np.nan, 10., np.nan])
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\stats\distributions_patch.py", line 273, in fit_fr
#    x0  = np.array(x0)[np.isnan(frmask)]
#ValueError: setting an array element with a sequence.
#
#>>> stats.pareto.fit_fr(y, frozen=[np.nan, 10., np.nan])
#array([ 1.03463526,  2.00184809])
#>>> stats.pareto.pdf(y, 1.03463526, 10, 2.00184809).sum()
#173.33947284555239
#>>> mod_par(1.03463526, 10, 2.00184809)
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#TypeError: 'MyPareto' object is not callable
#
#>>> mod_par.loglike(1.03463526, 10, 2.00184809)
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#TypeError: loglike() takes exactly 2 arguments (4 given)
#
#>>> mod_par.loglike((1.03463526, 10, 2.00184809))
#-962.21623668859741
#>>> np.log(stats.pareto.pdf(y, 1.03463526, 10, 2.00184809)).sum()
#-inf
#>>> np.log(stats.pareto.pdf(y, 1.03463526, 9, 2.00184809)).sum()
#-3074.5947476137271
#>>> np.log(stats.pareto.pdf(y, 1.03463526, 10., 2.00184809)).sum()
#-inf
#>>> np.log(stats.pareto.pdf(y, 1.03463526, 9.9, 2.00184809)).sum()
#-2677.3867091635661
#>>> y.min()
#12.001848089426717
#>>> np.log(stats.pareto.pdf(y, 1.03463526, loc=9.9, scale=2.00184809)).sum()
#-2677.3867091635661
#>>> np.log(stats.pareto.pdf(y, 1.03463526, loc=10., scale=2.00184809)).sum()
#-inf
#>>> stats.pareto.logpdf(y, 1.03463526, loc=10., scale=2.00184809).sum()
#-inf
#>>> stats.pareto.logpdf(y, 1.03463526, loc=9.99, scale=2.00184809).sum()
#-2631.6120098202355
#>>> mod_par.loglike((1.03463526, 9.99, 2.00184809))
#-963.2513896113644
#>>> maxabs(y, mod_par.endog)
#0.0
#>>> np.source(stats.pareto.logpdf)
#In file: C:\Josef\_progs\Subversion\scipy-trunk_after\trunk\dist\scipy-0.9.0.dev6579.win32\Programs\Python25\Lib\site-packages\scipy\stats\distributions.py
#
#    def logpdf(self, x, *args, **kwds):
#        """
#        Log of the probability density function at x of the given RV.
#
#        This uses more numerically accurate calculation if available.
#
#        Parameters
#        ----------
#        x : array-like
#            quantiles
#        arg1, arg2, arg3,... : array-like
#            The shape parameter(s) for the distribution (see docstring of the
#            instance object for more information)
#        loc : array-like, optional
#            location parameter (default=0)
#        scale : array-like, optional
#            scale parameter (default=1)
#
#        Returns
#        -------
#        logpdf : array-like
#            Log of the probability density function evaluated at x
#
#        """
#        loc,scale=map(kwds.get,['loc','scale'])
#        args, loc, scale = self._fix_loc_scale(args, loc, scale)
#        x,loc,scale = map(arr,(x,loc,scale))
#        args = tuple(map(arr,args))
#        x = arr((x-loc)*1.0/scale)
#        cond0 = self._argcheck(*args) & (scale > 0)
#        cond1 = (scale > 0) & (x >= self.a) & (x <= self.b)
#        cond = cond0 & cond1
#        output = empty(shape(cond),'d')
#        output.fill(NINF)
#        putmask(output,(1-cond0)*array(cond1,bool),self.badvalue)
#        goodargs = argsreduce(cond, *((x,)+args+(scale,)))
#        scale, goodargs = goodargs[-1], goodargs[:-1]
#        place(output,cond,self._logpdf(*goodargs) - log(scale))
#        if output.ndim == 0:
#            return output[()]
#        return output
#
#>>> np.source(stats.pareto._logpdf)
#In file: C:\Josef\_progs\Subversion\scipy-trunk_after\trunk\dist\scipy-0.9.0.dev6579.win32\Programs\Python25\Lib\site-packages\scipy\stats\distributions.py
#
#    def _logpdf(self, x, *args):
#        return log(self._pdf(x, *args))
#
#>>> np.source(stats.pareto._pdf)
#In file: C:\Josef\_progs\Subversion\scipy-trunk_after\trunk\dist\scipy-0.9.0.dev6579.win32\Programs\Python25\Lib\site-packages\scipy\stats\distributions.py
#
#    def _pdf(self, x, b):
#        return b * x**(-b-1)
#
#>>> stats.pareto.a
#1.0
#>>> (1-loc)/scale
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#NameError: name 'loc' is not defined
#
#>>> b, loc, scale = (1.03463526, 9.99, 2.00184809)
#>>> (1-loc)/scale
#-4.4908502522786327
#>>> (x-loc)/scale == 1
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#NameError: name 'x' is not defined
#
#>>> (lb-loc)/scale == 1
#Traceback (most recent call last):
#  File "<stdin>", line 1, in <module>
#NameError: name 'lb' is not defined
#
#>>> lb = scale + loc
#>>> lb
#11.991848090000001
#>>> (lb-loc)/scale == 1
#False
#>>> (lb-loc)/scale
#1.0000000000000004
#>>>
#'''

'''
repr(start_params) array([  1.,  10.,   2.])
Optimization terminated successfully.
         Current function value: 2626.436870
         Iterations: 102
         Function evaluations: 210
Optimization terminated successfully.
         Current function value: 0.016555
         Iterations: 16
         Function evaluations: 35
[  1.03482659  10.00737039   1.9944777 ]
(1.0596088578825995, 9.9043376069230007, 2.0975104813987118)
>>> 9.9043376069230007 + 2.0975104813987118
12.001848088321712
>>> y.min()
12.001848089426717

'''

'''
C:\Programs\Python25\lib\site-packages\matplotlib-0.99.1-py2.5-win32.egg\matplotlib\rcsetup.py:117: UserWarning: rcParams key "numerix" is obsolete and has no effect;
 please delete it from your matplotlibrc file
  warnings.warn('rcParams key "numerix" is obsolete and has no effect;\n'
0.0686702747648
0.0164150896481
0.128121386381
[ 0.10370428  0.09921315  0.09676723  0.10457413  0.10201618  0.89964496]
(array(0.0), array(1.4552599885729829), array(0.0), array(2.5072143354058221))
(array(0.0), array(1.6666666666666667), array(0.0), array(6.0))
repr(start_params) array([ 0.10370428,  0.09921315,  0.09676723,  0.10457413,  0.10201618,
        0.89964496,  6.39309417,  0.12812139])
Optimization terminated successfully.
         Current function value: -679.951339
         Iterations: 398
         Function evaluations: 609

estimation results t-dist
[ 0.10400826  0.10111893  0.09725133  0.10507788  0.10086163  0.8996041
  4.72131318  0.09825355]
[ 0.00365493  0.00356149  0.00349329  0.00362333  0.003732    0.00362716
  0.72329352  0.00388832]
repr(start_params) array([ 0.10400826,  0.10111893,  0.09725133,  0.10507788,  0.10086163,
        0.8996041 ,  4.72131318,  0.09825355])
Optimization terminated successfully.
         Current function value: -679.950443
         Iterations 3
using Newton
[ 0.10395383  0.10106762  0.09720665  0.10503384  0.10080599  0.89954546
  4.70918964  0.09815885]
[ 0.00365299  0.00355968  0.00349147  0.00362166  0.00373015  0.00362533
  0.7201488   0.00388437]
()
[ 0.09992709  0.09786601  0.09387356  0.10229919  0.09756623  0.85466272
  4.60459182  0.09661986]
[ 0.11308292  0.10828401  0.1028508   0.11268895  0.10934726  0.94462721
  7.15412655  0.13452746]
repr(start_params) array([ 1.,  9.,  2.])
Optimization terminated successfully.
         Current function value: 2636.129089
         Iterations: 147
         Function evaluations: 279
Optimization terminated successfully.
         Current function value: 0.016555
         Iterations: 16
         Function evaluations: 35
[  0.84856418  10.2197801    1.78206799]
(1.0596088578825995, 9.9043376069230007, 2.0975104813987118)
12.0018480891 12.0018480883 12.0018480894
repr(start_params) array([ 1.,  2.])
Warning: Desired error not necessarily achieveddue to precision loss
         Current function value: 2643.549907
         Iterations: 2
         Function evaluations: 13
         Gradient evaluations: 12
>>> res_parks2 = mod_par.fit_ks()
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2642.465273
         Iterations: 92
         Function evaluations: 172
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2636.639863
         Iterations: 73
         Function evaluations: 136
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2631.568778
         Iterations: 75
         Function evaluations: 133
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2627.821044
         Iterations: 75
         Function evaluations: 135
repr(start_params) array([ 1.,  2.])
Warning: Maximum number of function evaluations has been exceeded.
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2631.568778
         Iterations: 75
         Function evaluations: 133
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.431596
         Iterations: 58
         Function evaluations: 109
repr(start_params) array([ 1.,  2.])
Warning: Maximum number of function evaluations has been exceeded.
repr(start_params) array([ 1.,  2.])
Warning: Maximum number of function evaluations has been exceeded.
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.737426
         Iterations: 60
         Function evaluations: 109
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2627.821044
         Iterations: 75
         Function evaluations: 135
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.471666
         Iterations: 48
         Function evaluations: 94
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2627.196314
         Iterations: 66
         Function evaluations: 119
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.578538
         Iterations: 56
         Function evaluations: 103
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.471666
         Iterations: 48
         Function evaluations: 94
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.651702
         Iterations: 67
         Function evaluations: 122
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.737426
         Iterations: 60
         Function evaluations: 109
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.613505
         Iterations: 73
         Function evaluations: 141
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.578538
         Iterations: 56
         Function evaluations: 103
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.632218
         Iterations: 64
         Function evaluations: 119
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.651702
         Iterations: 67
         Function evaluations: 122
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.622789
         Iterations: 63
         Function evaluations: 114
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.613505
         Iterations: 73
         Function evaluations: 141
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.627465
         Iterations: 59
         Function evaluations: 109
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.632218
         Iterations: 64
         Function evaluations: 119
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.625104
         Iterations: 59
         Function evaluations: 108
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.629829
         Iterations: 66
         Function evaluations: 118
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.632218
         Iterations: 64
         Function evaluations: 119
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.632218
         Iterations: 64
         Function evaluations: 119
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.628642
         Iterations: 67
         Function evaluations: 122
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.631023
         Iterations: 68
         Function evaluations: 129
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.630430
         Iterations: 57
         Function evaluations: 108
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.629598
         Iterations: 60
         Function evaluations: 112
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.630430
         Iterations: 57
         Function evaluations: 108
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.630130
         Iterations: 65
         Function evaluations: 122
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.629536
         Iterations: 62
         Function evaluations: 111
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.630130
         Iterations: 65
         Function evaluations: 122
repr(start_params) array([ 1.,  2.])
Optimization terminated successfully.
         Current function value: 2626.629984
         Iterations: 67
         Function evaluations: 123
Optimization terminated successfully.
         Current function value: 0.016560
         Iterations: 18
         Function evaluations: 38
>>> res_parks2
(1.0592352626264809, 9.9051580457572399, 2.0966900385041591)
>>> res_parks
(1.0596088578825995, 9.9043376069230007, 2.0975104813987118)
>>> res_par.params
array([  0.84856418,  10.2197801 ,   1.78206799])
>>> np.sqrt(np.diag(mod_par.hessian(res_par.params)))
array([ NaN,  NaN,  NaN])
>>> mod_par.hessian(res_par.params
... )
array([[ NaN,  NaN,  NaN],
       [ NaN,  NaN,  NaN],
       [ NaN,  NaN,  NaN]])
>>> mod_par.hessian(res_parks)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 533, in hessian
    return approx_hess(params, self.loglike)[0]  #need options for hess (epsilon)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\regression\numdiff.py", line 118, in approx_hess
    xh = x + h
TypeError: can only concatenate tuple (not "float") to tuple

>>> mod_par.hessian(np.array(res_parks))
array([[ NaN,  NaN,  NaN],
       [ NaN,  NaN,  NaN],
       [ NaN,  NaN,  NaN]])
>>> mod_par.fixed_params
array([        NaN,  9.90510677,         NaN])
>>> mod_par.fixed_params=None
>>> mod_par.hessian(np.array(res_parks))
array([[-890.48553491,           NaN,           NaN],
       [          NaN,           NaN,           NaN],
       [          NaN,           NaN,           NaN]])
>>> mod_par.loglike(np.array(res_parks))
-2626.6322080820569
>>> mod_par.bsejac
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py", line 85, in __get__
    _cachedval = self.fget(obj)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 592, in bsejac
    return np.sqrt(np.diag(self.covjac))
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py", line 85, in __get__
    _cachedval = self.fget(obj)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 574, in covjac
    jacv = self.jacv
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py", line 85, in __get__
    _cachedval = self.fget(obj)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 557, in jacv
    return self.jac(self._results.params)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 530, in jac
    return approx_fprime1(params, self.loglikeobs, **kwds)
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\regression\numdiff.py", line 80, in approx_fprime1
    f0 = f(*((xk,)+args))
  File "c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\model.py", line 522, in loglikeobs
    return -self.nloglikeobs(params)
  File "C:\Josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\examples\ex_generic_mle_tdist.py", line 184, in nloglikeobs
    scale = params[2]
IndexError: index out of bounds
>>> hasattr(self, 'start_params')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'self' is not defined

>>> hasattr(mod_par, 'start_params')
True
>>> mod_par.start_params
array([ 1.,  2.])
>>> stats.pareto.stats(1., 9., 2., moments='mvsk')
(array(1.#INF), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(1., 8., 2., moments='mvsk')
(array(1.#INF), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(1., 8., 1., moments='mvsk')
(array(1.#INF), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(1., moments='mvsk')
(array(1.#INF), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(0.5., moments='mvsk')
  File "<stdin>", line 1
    stats.pareto.stats(0.5., moments='mvsk')
                           ^
SyntaxError: invalid syntax

>>> stats.pareto.stats(0.5, moments='mvsk')
(array(1.#INF), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(2, moments='mvsk')
(array(2.0), array(1.#INF), array(1.#QNAN), array(1.#QNAN))
>>> stats.pareto.stats(10, moments='mvsk')
(array(1.1111111111111112), array(0.015432098765432098), array(2.8110568859997356), array(14.828571428571429))
>>> stats.pareto.rvs(10, size=10)
array([ 1.07716265,  1.18977526,  1.07093   ,  1.05157081,  1.15991232,
        1.31015589,  1.06675107,  1.08082475,  1.19501243,  1.34967158])
>>> r = stats.pareto.rvs(10, size=1000)
>>> plt
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'plt' is not defined

>>> import matplotlib.pyplot as plt
>>> plt.hist(r)
(array([962,  32,   3,   2,   0,   0,   0,   0,   0,   1]), array([ 1.00013046,  1.3968991 ,  1.79366773,  2.19043637,  2.587205  ,
        2.98397364,  3.38074227,  3.77751091,  4.17427955,  4.57104818,
        4.96781682]), <a list of 10 Patch objects>)
>>> plt.show()

'''
steminc / scikits.statsmodels python

Products

About

Resources

Contact Gemfury