"""
Functions
---------
.. autosummary::
:toctree: generated/
line_search_armijo
line_search_wolfe1
line_search_wolfe2
scalar_search_wolfe1
scalar_search_wolfe2
"""
from __future__ import division, print_function, absolute_import
from warnings import warn
from scipy.optimize import minpack2
import numpy as np
from scipy._lib.six import xrange
__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
'scalar_search_wolfe1', 'scalar_search_wolfe2',
'line_search_armijo']
class LineSearchWarning(RuntimeWarning):
pass
#------------------------------------------------------------------------------
# Minpack's Wolfe line and scalar searches
#------------------------------------------------------------------------------
def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
old_fval=None, old_old_fval=None,
args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
xtol=1e-14):
"""
As `scalar_search_wolfe1` but do a line search to direction `pk`
Parameters
----------
f : callable
Function `f(x)`
fprime : callable
Gradient of `f`
xk : array_like
Current point
pk : array_like
Search direction
gfk : array_like, optional
Gradient of `f` at point `xk`
old_fval : float, optional
Value of `f` at point `xk`
old_old_fval : float, optional
Value of `f` at point preceding `xk`
The rest of the parameters are the same as for `scalar_search_wolfe1`.
Returns
-------
stp, f_count, g_count, fval, old_fval
As in `line_search_wolfe1`
gval : array
Gradient of `f` at the final point
"""
if gfk is None:
gfk = fprime(xk)
if isinstance(fprime, tuple):
eps = fprime[1]
fprime = fprime[0]
newargs = (f, eps) + args
gradient = False
else:
newargs = args
gradient = True
gval = [gfk]
gc = [0]
fc = [0]
def phi(s):
fc[0] += 1
return f(xk + s*pk, *args)
def derphi(s):
gval[0] = fprime(xk + s*pk, *newargs)
if gradient:
gc[0] += 1
else:
fc[0] += len(xk) + 1
return np.dot(gval[0], pk)
derphi0 = np.dot(gfk, pk)
stp, fval, old_fval = scalar_search_wolfe1(
phi, derphi, old_fval, old_old_fval, derphi0,
c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
return stp, fc[0], gc[0], fval, old_fval, gval[0]
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9,
amax=50, amin=1e-8, xtol=1e-14):
"""
Scalar function search for alpha that satisfies strong Wolfe conditions
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable phi(alpha)
Function at point `alpha`
derphi : callable phi'(alpha)
Objective function derivative. Returns a scalar.
phi0 : float, optional
Value of phi at 0
old_phi0 : float, optional
Value of phi at previous point
derphi0 : float, optional
Value derphi at 0
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax, amin : float, optional
Maximum and minimum step size
xtol : float, optional
Relative tolerance for an acceptable step.
Returns
-------
alpha : float
Step size, or None if no suitable step was found
phi : float
Value of `phi` at the new point `alpha`
phi0 : float
Value of `phi` at `alpha=0`
Notes
-----
Uses routine DCSRCH from MINPACK.
"""
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None:
derphi0 = derphi(0.)
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = 1.0
phi1 = phi0
derphi1 = derphi0
isave = np.zeros((2,), np.intc)
dsave = np.zeros((13,), float)
task = b'START'
maxiter = 100
for i in xrange(maxiter):
stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
c1, c2, xtol, task,
amin, amax, isave, dsave)
if task[:2] == b'FG':
alpha1 = stp
phi1 = phi(stp)
derphi1 = derphi(stp)
else:
break
else:
# maxiter reached, the line search did not converge
stp = None
if task[:5] == b'ERROR' or task[:4] == b'WARN':
stp = None # failed
return stp, phi1, phi0
line_search = line_search_wolfe1
#------------------------------------------------------------------------------
# Pure-Python Wolfe line and scalar searches
#------------------------------------------------------------------------------
def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
extra_condition=None, maxiter=10):
"""Find alpha that satisfies strong Wolfe conditions.
Parameters
----------
f : callable f(x,*args)
Objective function.
myfprime : callable f'(x,*args)
Objective function gradient.
xk : ndarray
Starting point.
pk : ndarray
Search direction.
gfk : ndarray, optional
Gradient value for x=xk (xk being the current parameter
estimate). Will be recomputed if omitted.
old_fval : float, optional
Function value for x=xk. Will be recomputed if omitted.
old_old_fval : float, optional
Function value for the point preceding x=xk
args : tuple, optional
Additional arguments passed to objective function.
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax : float, optional
Maximum step size
extra_condition : callable, optional
A callable of the form ``extra_condition(alpha, x, f, g)``
returning a boolean. Arguments are the proposed step ``alpha``
and the corresponding ``x``, ``f`` and ``g`` values. The line search
accepts the value of ``alpha`` only if this
callable returns ``True``. If the callable returns ``False``
for the step length, the algorithm will continue with
new iterates. The callable is only called for iterates
satisfying the strong Wolfe conditions.
maxiter : int, optional
Maximum number of iterations to perform
Returns
-------
alpha : float or None
Alpha for which ``x_new = x0 + alpha * pk``,
or None if the line search algorithm did not converge.
fc : int
Number of function evaluations made.
gc : int
Number of gradient evaluations made.
new_fval : float or None
New function value ``f(x_new)=f(x0+alpha*pk)``,
or None if the line search algorithm did not converge.
old_fval : float
Old function value ``f(x0)``.
new_slope : float or None
The local slope along the search direction at the
new value ``<myfprime(x_new), pk>``,
or None if the line search algorithm did not converge.
Notes
-----
Uses the line search algorithm to enforce strong Wolfe
conditions. See Wright and Nocedal, 'Numerical Optimization',
1999, pg. 59-60.
For the zoom phase it uses an algorithm by [...].
"""
fc = [0]
gc = [0]
gval = [None]
gval_alpha = [None]
def phi(alpha):
fc[0] += 1
return f(xk + alpha * pk, *args)
if isinstance(myfprime, tuple):
def derphi(alpha):
fc[0] += len(xk) + 1
eps = myfprime[1]
fprime = myfprime[0]
newargs = (f, eps) + args
gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use
gval_alpha[0] = alpha
return np.dot(gval[0], pk)
else:
fprime = myfprime
def derphi(alpha):
gc[0] += 1
gval[0] = fprime(xk + alpha * pk, *args) # store for later use
gval_alpha[0] = alpha
return np.dot(gval[0], pk)
if gfk is None:
gfk = fprime(xk, *args)
derphi0 = np.dot(gfk, pk)
if extra_condition is not None:
# Add the current gradient as argument, to avoid needless
# re-evaluation
def extra_condition2(alpha, phi):
if gval_alpha[0] != alpha:
derphi(alpha)
x = xk + alpha * pk
return extra_condition(alpha, x, phi, gval[0])
else:
extra_condition2 = None
alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
extra_condition2, maxiter=maxiter)
if derphi_star is None:
warn('The line search algorithm did not converge', LineSearchWarning)
else:
# derphi_star is a number (derphi) -- so use the most recently
# calculated gradient used in computing it derphi = gfk*pk
# this is the gradient at the next step no need to compute it
# again in the outer loop.
derphi_star = gval[0]
return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
def scalar_search_wolfe2(phi, derphi, phi0=None,
old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9, amax=None,
extra_condition=None, maxiter=10):
"""Find alpha that satisfies strong Wolfe conditions.
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable phi(alpha)
Objective scalar function.
derphi : callable phi'(alpha)
Objective function derivative. Returns a scalar.
phi0 : float, optional
Value of phi at 0
old_phi0 : float, optional
Value of phi at previous point
derphi0 : float, optional
Value of derphi at 0
c1 : float, optional
Loading ...