Gemfury

steminc / scikits.statsmodels python

Repository URL to install this package:
Details
scikits.statsmodels / statsmodels / sandbox / regression / numdiff.py
'''numerical differentiation function, gradient, Jacobian, and Hessian

These are simple forward differentiation, so that we have them available
without dependencies.

* Jacobian should be faster than numdifftools because it doesn't use loop over observations.
* numerical precision will vary and depend on the choice of stepsizes

Todo:
* some cleanup
* check numerical accuracy (and bugs) with numdifftools and analytical derivatives
  - linear least squares case: (hess - 2*X'X) is 1e-8 or so
  - gradient and Hessian agree with numdifftools when evaluated away from minimum
  - forward gradient, Jacobian evaluated at minimum is inaccurate, centered (+/- epsilon) is ok
* dot product of Jacobian is different from Hessian, either wrong example or a bug (unlikely),
  or a real difference


What are the conditions that Jacobian dotproduct and Hessian are the same?
see also:
BHHH: Greene p481 17.4.6,  MLE Jacobian = d loglike / d beta , where loglike is vector for each observation
   see also example 17.4 when J'J is very different from Hessian
   also does it hold only at the minimum, what's relationship to covariance of Jacobian matrix
http://projects.scipy.org/scipy/ticket/1157
http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
   objective: sum((y-f(beta,x)**2),   Jacobian = d f/d beta   and not d objective/d beta as in MLE Greene
   similar: http://crsouza.blogspot.com/2009/11/neural-network-learning-by-levenberg_18.html#hessian

in example: if J = d x*beta / d beta then J'J == X'X
   similar to http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm

Author : josef-pkt
License : BSD

'''



import numpy as np

EPS = 2.2204460492503131e-016

#from scipy.optimize
def approx_fprime(xk,f,epsilon=1e-8,*args):
    f0 = f(*((xk,)+args))
    grad = np.zeros((len(xk),), float)
    ei = np.zeros((len(xk),), float)
    for k in range(len(xk)):
        ei[k] = epsilon
        grad[k] = (f(*((xk+ei,)+args)) - f0)/epsilon
        ei[k] = 0.0
    return grad

def approx_fprime1(xk, f, epsilon=1e-12, args=(), centered=False):
    '''
    Gradient of function, or Jacobian if function f returns 1d array

    Parameters
    ----------
    xk : array
        parameters at which the derivative is evaluated
    f : function
        `*((xk,)+args)` returning either one value or 1d array
    epsilon : float
        stepsize, TODO add default
    *args : tuple
        tuple of additional arguments for function f

    Returns
    -------
    grad : array
        gradient or Jacobian, evaluated with single step forward differencing

    Notes
    -----

    todo:
    * add scaled stepsize
    ss- should scaled stepsize be an option in the gradient or should the
    gradient be scaled in within an optimization framework?
    '''
    f0 = f(*((xk,)+args))
    nobs = len(np.atleast_1d(f0)) # it could be a scalar
    grad = np.zeros((nobs,len(xk)), float)
    ei = np.zeros((len(xk),), float)
    #centered = False
    if not centered:
        for k in range(len(xk)):
            ei[k] = epsilon
            grad[:,k] = (f(*((xk+ei,)+args)) - f0)/epsilon
            ei[k] = 0.0
    else:
        for k in range(len(xk)):
            ei[k] = epsilon/2.
            grad[:,k] = (f(*((xk+ei,)+args)) - f(*((xk-ei,)+args)))/epsilon
            ei[k] = 0.0
    return grad.squeeze()

def approx_hess(xk, f, epsilon=None, args=()):#, returngrad=True):
    '''
    Calculate Hessian and Gradient by forward differentiation

    todo: cleanup args and options
    '''
    returngrad=True
    if epsilon is None:  #check
        #eps = 1e-5
        eps = 2.2204460492503131e-016 #np.MachAr().eps
        step = None
    else:
        step = epsilon  #TODO: shouldn't be here but I need to figure out args

    #x = xk  #alias drop this


    # Compute the stepsize (h)
    if step is None:  #check
        h = eps**(1/3.)*np.maximum(np.abs(xk),1e-2)
    else:
        h = step
    xh = xk + h
    h = xh - xk
    ee = np.diag(h.ravel())

    f0 = f(*((xk,)+args))
    # Compute forward step
    n = len(xk)
    g = np.zeros(n);
    for i in range(n):
        g[i] = f(*((xk+ee[i,:],)+args))

    hess = np.outer(h,h)
##    print hess.shape,
##    print 'H=', hess
##    print 'h=', hess
    # Compute "double" forward step
    for i in range(n):
        for j in range(i,n):
            hess[i,j] = (f(*((xk+ee[i,:]+ee[j,:],)+args))-g[i]-g[j]+f0)/hess[i,j]
            hess[j,i] = hess[i,j]
    if returngrad:
        grad = (g - f0)/h
        return hess, grad
    else:
        return hess

def approx_hess3(x0, f, epsilon=None, args=()):
    '''calculate Hessian with finite difference derivative approximation

    Parameters
    ----------
    x0 : array_like
       value at which function derivative is evaluated
    f : function
       function of one array f(x)
    epsilon : float
       stepsize, if None, then stepsize is automatically chosen

    Returns
    -------
    hess : ndarray
       array of partial second derivatives, Hessian

    Notes
    -----
    based on equation 9 in
    M. S. RIDOUT: Statistical Applications of the Complex-step Method
    of Numerical Differentiation, University of Kent, Canterbury, Kent, U.K.

    The stepsize is the same for the complex and the finite difference part.
    '''

    if epsilon is None:
        h = EPS**(1/5.)*np.maximum(np.abs(x0),1e-2) # 1/4 from ...
    else:
        h = epsilon
    xh = x0 + h
    h = xh - x0
    ee = np.diag(h)
    hess = np.outer(h,h)

    n = dim = np.size(x0) #TODO: What's the assumption on the shape here?

    for i in range(n):
        for j in range(i,n):
            hess[i,j] = (f(*((x0 + ee[i,:] + ee[j,:],)+args))
                            - f(*((x0 + ee[i,:] - ee[j,:],)+args))
                         - (f(*((x0 - ee[i,:] + ee[j,:],)+args))
                            - f(*((x0 - ee[i,:] - ee[j,:],)+args)))
                         )/4./hess[i,j]
            hess[j,i] = hess[i,j]

    return hess

def approx_fhess_p(x0,p,fprime,epsilon,*args):
    """
    Approximate the Hessian when the Jacobian is available.

    Parameters
    ----------
    x0 : array-like
        Point at which to evaluate the Hessian
    p : array-like
        Point
    fprime : func
        The Jacobian function
    epsilon : float

    """
    f2 = fprime(*((x0+epsilon*p,)+args))
    f1 = fprime(*((x0,)+args))
    return (f2 - f1)/epsilon

#copied from try_gradient_complexsteps, which contains notes and some examples for testing
#renamed from complex_step_grad to approx_fprime_cs, change order of arguments
#from Guilherme P. de Freitas, numpy mailing list
def approx_fprime_cs(x0, f, args=(), h=1.0e-20):
    '''calculate gradient or Jacobian with complex step derivative approximation

    Parameters
    ----------
    f : function
       function of one array f(x)
    x : array_like
       value at which function derivative is evaluated

    Returns
    -------
    partials : ndarray
       array of partial derivatives, Gradient or Jacobian
    '''
    dim = np.size(x0) #TODO: What's the assumption on the shape here?
    increments = np.identity(dim) * 1j * h
    #TODO: see if this can be vectorized, but usually dim is small
    partials = [f(x0+ih, *args).imag / h for ih in increments]
    return np.array(partials).T

def approx_hess_cs(x0, func, args=(), h=1.0e-20, epsilon=1e-6):
    def grad(x0):
        return approx_fprime_cs(x0, func, args=args, h=1.0e-20)

    #Hessian from gradient:
    return (approx_fprime1(x0, grad, epsilon)
            + approx_fprime1(x0, grad, -epsilon))/2.


def approx_hess_cs2(x0, f, epsilon=None, args=()):
    '''calculate Hessian with complex step (and fd) derivative approximation

    Parameters
    ----------
    x0 : array_like
       value at which function derivative is evaluated
    f : function
       function of one array f(x)
    epsilon : float
       stepsize, if None, then stepsize is automatically chosen

    Returns
    -------
    hess : ndarray
       array of partial second derivatives, Hessian

    Notes
    -----
    based on equation 10 in
    M. S. RIDOUT: Statistical Applications of the Complex-step Method
    of Numerical Differentiation, University of Kent, Canterbury, Kent, U.K.

    The stepsize is the same for the complex and the finite difference part.
    '''

    if epsilon is None:
        h = EPS**(1/5.)*np.maximum(np.abs(x0),1e-2) # 1/4 from ...
    else:
        h = epsilon
    xh = x0 + h
    h = xh - x0
    ee = np.diag(h)
    hess = np.outer(h,h)

    n = dim = np.size(x0) #TODO: What's the assumption on the shape here?

    for i in range(n):
        for j in range(i,n):
            hess[i,j] = (f(*((x0 + 1j*ee[i,:] + ee[j,:],)+args))
                         - f(*((x0 + 1j*ee[i,:] - ee[j,:],)+args))).imag/2./hess[i,j]
            hess[j,i] = hess[i,j]

    return hess


def fun(beta, x):
    return np.dot(x, beta).sum(0)

def fun1(beta, y, x):
    #print beta.shape, x.shape
    xb = np.dot(x, beta)
    return (y-xb)**2 #(xb-xb.mean(0))**2

def fun2(beta, y, x):
    #print beta.shape, x.shape
    return fun1(beta, y, x).sum(0)


if __name__ == '__main__':
    import scikits.statsmodels.api as sm
    from scipy.optimize.optimize import approx_fhess_p
    import numpy as np

    data = sm.datasets.spector.load()
    data.exog = sm.add_constant(data.exog)
    mod = sm.Probit(data.endog, data.exog)
    res = mod.fit(method="newton")
    test_params = [1,0.25,1.4,-7]
    llf = mod.loglike
    score = mod.score
    hess = mod.hessian

# below is Josef's scratch work

    nobs = 200
    x = np.arange(nobs*3).reshape(nobs,-1)
    x = np.random.randn(nobs,3)

    xk = np.array([1,2,3])
    xk = np.array([1.,1.,1.])
    #xk = np.zeros(3)
    beta = xk
    y = np.dot(x, beta) + 0.1*np.random.randn(nobs)
    xk = np.dot(np.linalg.pinv(x),y)


    epsilon = 1e-6
    args = (y,x)
    from scipy import optimize
    xfmin = optimize.fmin(fun2, (0,0,0), args)
    print approx_fprime((1,2,3),fun,epsilon,x)
    jac = approx_fprime1(xk,fun1,epsilon,args)
    jacmin = approx_fprime1(xk,fun1,-epsilon,args)
    #print jac
    print jac.sum(0)
    print '\nnp.dot(jac.T, jac)'
    print np.dot(jac.T, jac)
    print '\n2*np.dot(x.T, x)'
    print 2*np.dot(x.T, x)
    jac2 = (jac+jacmin)/2.
    print np.dot(jac2.T, jac2)

    #he = approx_hess(xk,fun2,epsilon,*args)
    print approx_hess(xk,fun2,1e-3,args)
    he = approx_hess(xk,fun2,None,args)
    print 'hessfd'
    print he
    print 'epsilon =', None
    print he[0] - 2*np.dot(x.T, x)

    for eps in [1e-3,1e-4,1e-5,1e-6]:
        print 'eps =', eps
        print approx_hess(xk,fun2,eps,args)[0] - 2*np.dot(x.T, x)

    hcs2 = approx_hess_cs2(xk,fun2,args=args)
    print 'hcs2'
    print hcs2 - 2*np.dot(x.T, x)

    hfd3 = approx_hess3(xk,fun2,args=args)
    print 'hfd3'
    print hfd3 - 2*np.dot(x.T, x)

    import numdifftools as nd
    hnd = nd.Hessian(lambda a: fun2(a, y, x))
    hessnd = hnd(xk)
    print 'numdiff'
    print hessnd - 2*np.dot(x.T, x)
    #assert_almost_equal(hessnd, he[0])
    gnd = nd.Gradient(lambda a: fun2(a, y, x))
    gradnd = gnd(xk)
'''
>>> hnd = nd.Hessian(lambda a: fun2(a, x))
>>> hnd(xk)
array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]])
>>> he
(array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]]), array([ 2.35204474,  1.92684939,  2.11920745]))
>>> hnd = nd.Gradient(lambda a: fun2(a, x))
>>> hnd(xk)
array([  0.00000000e+00,   1.40036521e-14,  -2.59014117e-14])
>>> hnd((1.2, 1.2, 1.2))
array([ 41.58106741,  34.51076432,  38.95952468])
>>> approx_fprime1(xk,fun1,epsilon,*args).sum(0)
array([  1.08438310e-04,   9.03821177e-05,   9.92578403e-05])
>>> approx_fprime1((1.2, 1.2, 1.2),fun1,epsilon,*args).sum(0)
array([ 41.58117585,  34.5108547 ,  38.95962393])
>>> approx_fprime((1.2, 1.2, 1.2),fun2,epsilon,*args).sum(0)
115.05165448078003
>>> approx_fprime((1.2, 1.2, 1.2),fun2,epsilon,*args)
array([ 41.58117584,  34.5108547 ,  38.95962393])
>>> epsilon
9.9999999999999995e-007
>>> approx_hess(np.array([1.2, 1.2, 1.2]),fun2,epsilon,*args)
(3, 3) H= [[  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]]
h= [[  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]]
(array([[ 216.87718291,   -3.41771056,    1.87938554],
       [  -3.41771056,  180.76384836,  -13.74189651],
       [   1.87938554,  -13.74189651,  198.51498226]]), array([ 41.58117585,  34.51085471,  38.95962394]))


>>> hnd = nd.Hessian(lambda a: fun2(a, x))
>>> hnd((1.2, 1.2, 1.2))
array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]])
>>> hnd(xk)
array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]])
>>> j = approx_fprime1((1.2, 1.2, 1.2),fun1,epsilon,*args)
>>> np.dot(j.T,j)
array([[ 90.37418663,  23.81355855,  33.2936759 ],
       [ 23.81355855,  49.01569714,  16.70507137],
       [ 33.2936759 ,  16.70507137,  82.24708274]])


>>> heb = approx_hess(xk,fun2,-1e-6,*args)
(3, 3) H= [[  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]]
h= [[  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]
 [  1.00000000e-12   1.00000000e-12   1.00000000e-12]]
>>> hef
(array([[ 216.87707189,   -3.41926487,    1.87860838],
       [  -3.41926487,  180.76329321,  -13.74345082],
       [   1.87860838,  -13.74345082,  198.51542631]]), array([  1.08438369e-04,   9.03821462e-05,   9.92579352e-05]))
>>> heb
(array([[ 216.87729394,   -3.41848772,    1.87905247],
       [  -3.41848772,  180.76384832,  -13.7433398 ],
       [   1.87905247,  -13.7433398 ,  198.51575938]]), array([ -1.08438258e-04,  -9.03819242e-05,  -9.92578242e-05]))
>>> (hef[1]+heb[1])/2.
array([  5.55111512e-11,   1.11022302e-10,   5.55111512e-11])
>>> (hef[0]+heb[0])/2.
array([[ 216.87718291,   -3.41887629,    1.87883042],
       [  -3.41887629,  180.76357077,  -13.74339531],
       [   1.87883042,  -13.74339531,  198.51559284]])
>>> hnd = nd.Hessian(lambda a: fun2(a, x))
>>> hnd(xk)
array([[ 216.87702746,   -3.41892545,    1.87887281],
       [  -3.41892545,  180.76379116,  -13.74326021],
       [   1.87887281,  -13.74326021,  198.5155617 ]])
>>> j = approx_fprime1((1.2, 1.2, 1.2),fun1,epsilon,*args)
>>> np.dot(j.T,j)
array([[ 90.37418663,  23.81355855,  33.2936759 ],
       [ 23.81355855,  49.01569714,  16.70507137],
       [ 33.2936759 ,  16.70507137,  82.24708274]])
'''
steminc / scikits.statsmodels python

Products

About

Resources

Contact Gemfury