"""Generic interface for least-square minimization."""
from __future__ import division, print_function, absolute_import
from warnings import warn
import numpy as np
from numpy.linalg import norm
from scipy.sparse import issparse, csr_matrix
from scipy.sparse.linalg import LinearOperator
from scipy.optimize import _minpack, OptimizeResult
from scipy.optimize._numdiff import approx_derivative, group_columns
from scipy._lib.six import string_types
from .trf import trf
from .dogbox import dogbox
from .common import EPS, in_bounds, make_strictly_feasible
TERMINATION_MESSAGES = {
-1: "Improper input parameters status returned from `leastsq`",
0: "The maximum number of function evaluations is exceeded.",
1: "`gtol` termination condition is satisfied.",
2: "`ftol` termination condition is satisfied.",
3: "`xtol` termination condition is satisfied.",
4: "Both `ftol` and `xtol` termination conditions are satisfied."
}
FROM_MINPACK_TO_COMMON = {
0: -1, # Improper input parameters from MINPACK.
1: 2,
2: 3,
3: 4,
4: 1,
5: 0
# There are 6, 7, 8 for too small tolerance parameters,
# but we guard against it by checking ftol, xtol, gtol beforehand.
}
def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
n = x0.size
if diff_step is None:
epsfcn = EPS
else:
epsfcn = diff_step**2
# Compute MINPACK's `diag`, which is inverse of our `x_scale` and
# ``x_scale='jac'`` corresponds to ``diag=None``.
if isinstance(x_scale, string_types) and x_scale == 'jac':
diag = None
else:
diag = 1 / x_scale
full_output = True
col_deriv = False
factor = 100.0
if jac is None:
if max_nfev is None:
# n squared to account for Jacobian evaluations.
max_nfev = 100 * n * (n + 1)
x, info, status = _minpack._lmdif(
fun, x0, (), full_output, ftol, xtol, gtol,
max_nfev, epsfcn, factor, diag)
else:
if max_nfev is None:
max_nfev = 100 * n
x, info, status = _minpack._lmder(
fun, jac, x0, (), full_output, col_deriv,
ftol, xtol, gtol, max_nfev, factor, diag)
f = info['fvec']
if callable(jac):
J = jac(x)
else:
J = np.atleast_2d(approx_derivative(fun, x))
cost = 0.5 * np.dot(f, f)
g = J.T.dot(f)
g_norm = norm(g, ord=np.inf)
nfev = info['nfev']
njev = info.get('njev', None)
status = FROM_MINPACK_TO_COMMON[status]
active_mask = np.zeros_like(x0, dtype=int)
return OptimizeResult(
x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
active_mask=active_mask, nfev=nfev, njev=njev, status=status)
def prepare_bounds(bounds, n):
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
if lb.ndim == 0:
lb = np.resize(lb, n)
if ub.ndim == 0:
ub = np.resize(ub, n)
return lb, ub
def check_tolerance(ftol, xtol, gtol):
def check(tol, name):
if tol is None:
tol = 0
elif tol < EPS:
warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
"disables the corresponding termination condition."
.format(name, EPS))
return tol
ftol = check(ftol, "ftol")
xtol = check(xtol, "xtol")
gtol = check(gtol, "gtol")
if ftol < EPS and xtol < EPS and gtol < EPS:
raise ValueError("At least one of the tolerances must be higher than "
"machine epsilon ({:.2e}).".format(EPS))
return ftol, xtol, gtol
def check_x_scale(x_scale, x0):
if isinstance(x_scale, string_types) and x_scale == 'jac':
return x_scale
try:
x_scale = np.asarray(x_scale, dtype=float)
valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
except (ValueError, TypeError):
valid = False
if not valid:
raise ValueError("`x_scale` must be 'jac' or array_like with "
"positive numbers.")
if x_scale.ndim == 0:
x_scale = np.resize(x_scale, x0.shape)
if x_scale.shape != x0.shape:
raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
return x_scale
def check_jac_sparsity(jac_sparsity, m, n):
if jac_sparsity is None:
return None
if not issparse(jac_sparsity):
jac_sparsity = np.atleast_2d(jac_sparsity)
if jac_sparsity.shape != (m, n):
raise ValueError("`jac_sparsity` has wrong shape.")
return jac_sparsity, group_columns(jac_sparsity)
# Loss functions.
def huber(z, rho, cost_only):
mask = z <= 1
rho[0, mask] = z[mask]
rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
if cost_only:
return
rho[1, mask] = 1
rho[1, ~mask] = z[~mask]**-0.5
rho[2, mask] = 0
rho[2, ~mask] = -0.5 * z[~mask]**-1.5
def soft_l1(z, rho, cost_only):
t = 1 + z
rho[0] = 2 * (t**0.5 - 1)
if cost_only:
return
rho[1] = t**-0.5
rho[2] = -0.5 * t**-1.5
def cauchy(z, rho, cost_only):
rho[0] = np.log1p(z)
if cost_only:
return
t = 1 + z
rho[1] = 1 / t
rho[2] = -1 / t**2
def arctan(z, rho, cost_only):
rho[0] = np.arctan(z)
if cost_only:
return
t = 1 + z**2
rho[1] = 1 / t
rho[2] = -2 * z / t**2
IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
cauchy=cauchy, arctan=arctan)
def construct_loss_function(m, loss, f_scale):
if loss == 'linear':
return None
if not callable(loss):
loss = IMPLEMENTED_LOSSES[loss]
rho = np.empty((3, m))
def loss_function(f, cost_only=False):
z = (f / f_scale) ** 2
loss(z, rho, cost_only=cost_only)
if cost_only:
return 0.5 * f_scale ** 2 * np.sum(rho[0])
rho[0] *= f_scale ** 2
rho[2] /= f_scale ** 2
return rho
else:
def loss_function(f, cost_only=False):
z = (f / f_scale) ** 2
rho = loss(z)
if cost_only:
return 0.5 * f_scale ** 2 * np.sum(rho[0])
rho[0] *= f_scale ** 2
rho[2] /= f_scale ** 2
return rho
return loss_function
def least_squares(
fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
"""Solve a nonlinear least-squares problem with bounds on the variables.
Given the residuals f(x) (an m-dimensional real function of n real
variables) and the loss function rho(s) (a scalar function), `least_squares`
finds a local minimum of the cost function F(x)::
minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
subject to lb <= x <= ub
The purpose of the loss function rho(s) is to reduce the influence of
outliers on the solution.
Parameters
----------
fun : callable
Function which computes the vector of residuals, with the signature
``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
respect to its first argument. The argument ``x`` passed to this
function is an ndarray of shape (n,) (never a scalar, even for n=1).
It must return a 1-d array_like of shape (m,) or a scalar. If the
argument ``x`` is complex or the function ``fun`` returns complex
residuals, it must be wrapped in a real function of real arguments,
as shown at the end of the Examples section.
x0 : array_like with shape (n,) or float
Initial guess on independent variables. If float, it will be treated
as a 1-d array with one element.
jac : {'2-point', '3-point', 'cs', callable}, optional
Method of computing the Jacobian matrix (an m-by-n matrix, where
element (i, j) is the partial derivative of f[i] with respect to
x[j]). The keywords select a finite difference scheme for numerical
estimation. The scheme '3-point' is more accurate, but requires
twice as many operations as '2-point' (default). The scheme 'cs'
uses complex steps, and while potentially the most accurate, it is
applicable only when `fun` correctly handles complex inputs and
can be analytically continued to the complex plane. Method 'lm'
always uses the '2-point' scheme. If callable, it is used as
``jac(x, *args, **kwargs)`` and should return a good approximation
(or the exact value) for the Jacobian as an array_like (np.atleast_2d
is applied), a sparse matrix or a `scipy.sparse.linalg.LinearOperator`.
bounds : 2-tuple of array_like, optional
Lower and upper bounds on independent variables. Defaults to no bounds.
Each array must match the size of `x0` or be a scalar, in the latter
case a bound will be the same for all variables. Use ``np.inf`` with
an appropriate sign to disable bounds on all or some variables.
method : {'trf', 'dogbox', 'lm'}, optional
Algorithm to perform minimization.
* 'trf' : Trust Region Reflective algorithm, particularly suitable
for large sparse problems with bounds. Generally robust method.
* 'dogbox' : dogleg algorithm with rectangular trust regions,
typical use case is small problems with bounds. Not recommended
for problems with rank-deficient Jacobian.
* 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
Doesn't handle bounds and sparse Jacobians. Usually the most
efficient method for small unconstrained problems.
Default is 'trf'. See Notes for more information.
ftol : float or None, optional
Tolerance for termination by the change of the cost function. Default
is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
and there was an adequate agreement between a local quadratic model and
the true model in the last step. If None, the termination by this
condition is disabled.
xtol : float or None, optional
Tolerance for termination by the change of the independent variables.
Default is 1e-8. The exact condition depends on the `method` used:
* For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``
* For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
a trust-region radius and ``xs`` is the value of ``x``
scaled according to `x_scale` parameter (see below).
If None, the termination by this condition is disabled.
gtol : float or None, optional
Tolerance for termination by the norm of the gradient. Default is 1e-8.
The exact condition depends on a `method` used:
* For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
``g_scaled`` is the value of the gradient scaled to account for
the presence of the bounds [STIR]_.
* For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
``g_free`` is the gradient with respect to the variables which
are not in the optimal state on the boundary.
* For 'lm' : the maximum absolute value of the cosine of angles
between columns of the Jacobian and the residual vector is less
than `gtol`, or the residual vector is zero.
If None, the termination by this condition is disabled.
x_scale : array_like or 'jac', optional
Characteristic scale of each variable. Setting `x_scale` is equivalent
to reformulating the problem in scaled variables ``xs = x / x_scale``.
An alternative view is that the size of a trust region along j-th
dimension is proportional to ``x_scale[j]``. Improved convergence may
be achieved by setting `x_scale` such that a step of a given size
along any of the scaled variables has a similar effect on the cost
function. If set to 'jac', the scale is iteratively updated using the
inverse norms of the columns of the Jacobian matrix (as described in
[JJMore]_).
loss : str or callable, optional
Determines the loss function. The following keyword values are allowed:
Loading ...