The fatiando package has been deprecated. Please check out the new tools in the Fatiando a Terra website: www.fatiando.org

Source code for fatiando.inversion.optimization

"""
Methods to optimize a given objective function.

All solvers are Python iterators. This means that should be used in a ``for``
loop, like so::

    solver = newton(hess_func, grad_func, value_func, initial)
    for i, p, stats in solver:
        ... do something or 'continue' to step through the iterations ...
        # 'p' is the current estimate for the parameter vector at the 'i'th
        # iteration.
        # 'stats' is a dictionary with some information about the optimization
        # process so far (number of attempted steps, value of objective
        # function per step, total number of iterations so far, etc).
    # At the end, 'p' is the final estimate and 'stats' will contain the
    # statistics for the whole iteration process.

**Gradient descent**

* :func:`~fatiando.inversion.optimization.linear`: Solver for a linear problem
* :func:`~fatiando.inversion.optimization.newton`: Newton's method
* :func:`~fatiando.inversion.optimization.levmarq`: Levemberg-Marquardt
  algorithm
* :func:`~fatiando.inversion.optimization.steepest`: Steepest Descent method

**Heuristic methods**

* :func:`~fatiando.inversion.optimization.acor`: ACO-R: Ant Colony Optimization
  for Continuous Domains (Socha and Dorigo, 2008)

**References**

Socha, K., and M. Dorigo (2008), Ant colony optimization for continuous
domains, European Journal of Operational Research, 185(3), 1155-1173,
doi:10.1016/j.ejor.2006.06.046.


----

"""
from __future__ import division
import copy
import warnings
import numpy
import scipy.sparse

from ..utils import safe_solve, safe_diagonal, safe_dot


[docs]def linear(hessian, gradient, precondition=True): r""" Find the parameter vector that minimizes a linear objective function. The parameter vector :math:`\bar{p}` that minimizes this objective function :math:`\phi` is the one that solves the linear system .. math:: \bar{\bar{H}} \bar{p} = -\bar{g} where :math:`\bar{\bar{H}}` is the Hessian matrix of :math:`\phi` and :math:`\bar{g}` is the gradient vector of :math:`\phi`. Parameters: * hessian : 2d-array The Hessian matrix of the objective function. * gradient : 1d-array The gradient vector of the objective function. * precondition : True or False If True, will use Jacobi preconditioning. Yields: * i, estimate, stats: * i : int The current iteration number * estimate : 1d-array The current estimated parameter vector * stats : dict Statistics about the optimization so far Linear solvers have only a single step, so ``i`` will be 0 and ``stats`` will only have the method name. """ if precondition: diag = numpy.abs(safe_diagonal(hessian)) diag[diag < 10 ** -10] = 10 ** -10 precond = scipy.sparse.diags(1. / diag, 0).tocsr() hessian = safe_dot(precond, hessian) gradient = safe_dot(precond, gradient) p = safe_solve(hessian, -gradient) yield 0, p, dict(method="Linear solver")
[docs]def newton(hessian, gradient, value, initial, maxit=30, tol=10 ** -5, precondition=True): r""" Minimize an objective function using Newton's method. Newton's method searches for the minimum of an objective function :math:`\phi(\bar{p})` by successively incrementing the initial estimate. The increment is the solution of the linear system .. math:: \bar{\bar{H}}(\bar{p}^k) \bar{\Delta p}^k = -\bar{g}(\bar{p}^k) where :math:`\bar{\bar{H}}` is the Hessian matrix of :math:`\phi` and :math:`\bar{g}` is the gradient vector of :math:`\phi`. Both are evaluated at the previous estimate :math:`\bar{p}^k`. Parameters: * hessian : function A function that returns the Hessian matrix of the objective function when given a parameter vector. * gradient : function A function that returns the gradient vector of the objective function when given a parameter vector. * value : function A function that returns the value of the objective function evaluated at a given parameter vector. * initial : 1d-array The initial estimate for the gradient descent. * maxit : int The maximum number of iterations allowed. * tol : float The convergence criterion. The lower it is, the more steps are permitted. * precondition : True or False If True, will use Jacobi preconditioning. Returns: Yields: * i, estimate, stats: * i : int The current iteration number * estimate : 1d-array The current estimated parameter vector * stats : dict Statistics about the optimization so far. Keys: * method : str The name of the optimization method * iterations : int The total number of iterations so far * objective : list Value of the objective function per iteration. First value corresponds to the inital estimate """ stats = dict(method="Newton's method", iterations=0, objective=[]) p = numpy.array(initial, dtype=numpy.float) misfit = value(p) stats['objective'].append(misfit) for iteration in xrange(maxit): hess = hessian(p) grad = gradient(p) if precondition: diag = numpy.abs(safe_diagonal(hess)) diag[diag < 10 ** -10] = 10 ** -10 precond = scipy.sparse.diags(1. / diag, 0).tocsr() hess = safe_dot(precond, hess) grad = safe_dot(precond, grad) p = p + safe_solve(hess, -grad) newmisfit = value(p) stats['objective'].append(newmisfit) stats['iterations'] += 1 yield iteration, p, copy.deepcopy(stats) if newmisfit > misfit or abs((newmisfit - misfit) / misfit) < tol: break misfit = newmisfit if iteration == maxit - 1: warnings.warn( 'Exited because maximum iterations reached. ' + 'Might not have achieved convergence. ' + 'Try inscreasing the maximum number of iterations allowed.', RuntimeWarning)
[docs]def levmarq(hessian, gradient, value, initial, maxit=30, maxsteps=20, lamb=10, dlamb=2, tol=10**-5, precondition=True): r""" Minimize an objective function using the Levemberg-Marquardt algorithm. Parameters: * hessian : function A function that returns the Hessian matrix of the objective function when given a parameter vector. * gradient : function A function that returns the gradient vector of the objective function when given a parameter vector. * value : function A function that returns the value of the objective function evaluated at a given parameter vector. * initial : 1d-array The initial estimate for the gradient descent. * maxit : int The maximum number of iterations allowed. * maxsteps : int The maximum number of times to try to take a step before giving up. * lamb : float Initial amount of step regularization. The larger this is, the more the algorithm will resemble Steepest Descent in the initial iterations. * dlamb : float Factor by which *lamb* is divided or multiplied when taking steps. * tol : float The convergence criterion. The lower it is, the more steps are permitted. * precondition : True or False If True, will use Jacobi preconditioning. Yields: * i, estimate, stats: * i : int The current iteration number * estimate : 1d-array The current estimated parameter vector * stats : dict Statistics about the optimization so far. Keys: * method : str The name of the optimization method * iterations : int The total number of iterations so far * objective : list Value of the objective function per iteration. First value corresponds to the inital estimate * step_attempts : list Number of attempts at taking a step per iteration. First number is zero, reflecting the initial estimate. """ stats = dict(method="Levemberg-Marquardt", iterations=0, objective=[], step_attempts=[], step_size=[]) p = numpy.array(initial, dtype=numpy.float) misfit = value(p) stats['objective'].append(misfit) stats['step_attempts'].append(0) stats['step_size'].append(lamb) for iteration in xrange(maxit): hess = hessian(p) minus_gradient = -gradient(p) if precondition: diag = numpy.abs(safe_diagonal(hess)) diag[diag < 10 ** -10] = 10 ** -10 precond = scipy.sparse.diags(1. / diag, 0).tocsr() hess = safe_dot(precond, hess) minus_gradient = safe_dot(precond, minus_gradient) stagnation = True diag = scipy.sparse.diags(safe_diagonal(hess), 0).tocsr() for step in xrange(maxsteps): newp = p + safe_solve(hess + lamb * diag, minus_gradient) newmisfit = value(newp) if newmisfit >= misfit: if lamb < 10 ** 15: lamb = lamb*dlamb else: if lamb > 10 ** -15: lamb = lamb/dlamb stagnation = False break if stagnation: stop = True warnings.warn( "Exited because couldn't take a step without increasing " + 'the objective function. ' + 'Might not have achieved convergence. ' + 'Try inscreasing the max number of step attempts allowed.', RuntimeWarning) else: stop = newmisfit > misfit or abs( (newmisfit - misfit) / misfit) < tol p = newp misfit = newmisfit # Getting inside here means that I could take a step, so this is # where the yield goes. stats['objective'].append(misfit) stats['iterations'] += 1 stats['step_attempts'].append(step + 1) stats['step_size'].append(lamb) yield iteration, p, copy.deepcopy(stats) if stop: break if iteration == maxit - 1: warnings.warn( 'Exited because maximum iterations reached. ' + 'Might not have achieved convergence. ' + 'Try inscreasing the maximum number of iterations allowed.', RuntimeWarning)
[docs]def steepest(gradient, value, initial, maxit=1000, linesearch=True, maxsteps=30, beta=0.1, tol=10**-5): r""" Minimize an objective function using the Steepest Descent method. The increment to the initial estimate of the parameter vector :math:`\bar{p}` is calculated by (Kelley, 1999) .. math:: \Delta\bar{p} = -\lambda\bar{g} where :math:`\lambda` is the step size and :math:`\bar{g}` is the gradient vector. The step size can be determined thought a line search algorithm using the Armijo rule (Kelley, 1999). In this case, .. math:: \lambda = \beta^m where :math:`1 > \beta > 0` and :math:`m \ge 0` is an integer that controls the step size. The line search finds the smallest :math:`m` that satisfies the Armijo rule .. math:: \phi(\bar{p} + \Delta\bar{p}) - \Gamma(\bar{p}) < \alpha\beta^m ||\bar{g}(\bar{p})||^2 where :math:`\phi(\bar{p})` is the objective function evaluated at :math:`\bar{p}` and :math:`\alpha = 10^{-4}`. Parameters: * gradient : function A function that returns the gradient vector of the objective function when given a parameter vector. * value : function A function that returns the value of the objective function evaluated at a given parameter vector. * initial : 1d-array The initial estimate for the gradient descent. * maxit : int The maximum number of iterations allowed. * linesearch : True or False Whether or not to perform the line search to determine an optimal step size. * maxsteps : int The maximum number of times to try to take a step before giving up. * beta : float The base factor used to determine the step size in line search algorithm. Must be 1 > beta > 0. * tol : float The convergence criterion. The lower it is, the more steps are permitted. Yields: * i, estimate, stats: * i : int The current iteration number * estimate : 1d-array The current estimated parameter vector * stats : dict Statistics about the optimization so far. Keys: * method : stf The name of the optimization algorithm * iterations : int The total number of iterations so far * objective : list Value of the objective function per iteration. First value corresponds to the inital estimate * step_attempts : list Number of attempts at taking a step per iteration. First number is zero, reflecting the initial estimate. Will be empty if ``linesearch==False``. References: Kelley, C. T., 1999, Iterative methods for optimization: Raleigh: SIAM. """ assert 1 > beta > 0, \ "Invalid 'beta' parameter {}. Must be 1 > beta > 0".format(beta) stats = dict(method='Steepest Descent', iterations=0, objective=[], step_attempts=[]) p = numpy.array(initial, dtype=numpy.float) misfit = value(p) stats['objective'].append(misfit) if linesearch: stats['step_attempts'].append(0) # This is a mystic parameter of the Armijo rule alpha = 10 ** (-4) stagnation = False for iteration in xrange(maxit): grad = gradient(p) if linesearch: # Calculate now to avoid computing inside the loop gradnorm = numpy.linalg.norm(grad) ** 2 stagnation = True # Determine the best step size for i in xrange(maxsteps): stepsize = beta**i newp = p - stepsize*grad newmisfit = value(newp) if newmisfit - misfit < alpha*stepsize*gradnorm: stagnation = False break else: newp = p - grad newmisfit = value(newp) if stagnation: stop = True warnings.warn( "Exited because couldn't take a step without increasing " + 'the objective function. ' + 'Might not have achieved convergence. ' + 'Try inscreasing the max number of step attempts allowed.', RuntimeWarning) else: stop = abs((newmisfit - misfit) / misfit) < tol p = newp misfit = newmisfit # Getting inside here means that I could take a step, so this is # where the yield goes. stats['objective'].append(misfit) stats['iterations'] += 1 if linesearch: stats['step_attempts'].append(i + 1) yield iteration, p, copy.deepcopy(stats) if stop: break if iteration == maxit - 1: warnings.warn( 'Exited because maximum iterations reached. ' + 'Might not have achieved convergence. ' + 'Try inscreasing the maximum number of iterations allowed.', RuntimeWarning)
[docs]def acor(value, bounds, nparams, nants=None, archive_size=None, maxit=1000, diverse=0.5, evap=0.85, seed=None): """ Minimize the objective function using ACO-R. ACO-R stands for Ant Colony Optimization for Continuous Domains (Socha and Dorigo, 2008). Parameters: * value : function Returns the value of the objective function at a given parameter vector * bounds : list The bounds of the search space. If only two values are given, will interpret as the minimum and maximum, respectively, for all parameters. Alternatively, you can given a minimum and maximum for each parameter, e.g., for a problem with 3 parameters you could give `bounds = [min1, max1, min2, max2, min3, max3]`. * nparams : int The number of parameters that the objective function takes. * nants : int The number of ants to use in the search. Defaults to the number of parameters. * archive_size : int The number of solutions to keep in the solution archive. Defaults to 10 x nants * maxit : int The number of iterations to run. * diverse : float Scalar from 0 to 1, non-inclusive, that controls how much better solutions are favored when constructing new ones. * evap : float The pheromone evaporation rate (evap > 0). Controls how spread out the search is. * seed : None or int Seed for the random number generator. Yields: * i, estimate, stats: * i : int The current iteration number * estimate : 1d-array The current best estimated parameter vector * stats : dict Statistics about the optimization so far. Keys: * method : stf The name of the optimization algorithm * iterations : int The total number of iterations so far * objective : list Value of the objective function corresponding to the best estimate per iteration. """ stats = dict(method="Ant Colony Optimization for Continuous Domains", iterations=0, objective=[]) numpy.random.seed(seed) # Set the defaults for number of ants and archive size if nants is None: nants = nparams if archive_size is None: archive_size = 10 * nants # Check is giving bounds for each parameter or one for all bounds = numpy.array(bounds) if bounds.size == 2: low, high = bounds archive = numpy.random.uniform(low, high, (archive_size, nparams)) else: archive = numpy.empty((archive_size, nparams)) bounds = bounds.reshape((nparams, 2)) for i, bound in enumerate(bounds): low, high = bound archive[:, i] = numpy.random.uniform(low, high, archive_size) # Compute the inital pheromone trail based on the objetive function value trail = numpy.fromiter((value(p) for p in archive), dtype=numpy.float) # Sort the archive of initial random solutions order = numpy.argsort(trail) archive = [archive[i] for i in order] trail = trail[order].tolist() stats['objective'].append(trail[0]) # Compute the weights (probabilities) of the solutions in the archive amp = 1. / (diverse * archive_size * numpy.sqrt(2 * numpy.pi)) variance = 2 * diverse ** 2 * archive_size ** 2 weights = amp * numpy.exp(-numpy.arange(archive_size) ** 2 / variance) weights /= numpy.sum(weights) for iteration in xrange(maxit): for k in xrange(nants): # Sample the propabilities to produce new estimates ant = numpy.empty(nparams, dtype=numpy.float) # 1. Choose a pdf from the archive pdf = numpy.searchsorted( numpy.cumsum(weights), numpy.random.uniform()) for i in xrange(nparams): # 2. Get the mean and stddev of the chosen pdf mean = archive[pdf][i] std = (evap / (archive_size - 1)) * numpy.sum( abs(p[i] - archive[pdf][i]) for p in archive) # 3. Sample the pdf until the samples are in bounds for atempt in xrange(100): ant[i] = numpy.random.normal(mean, std) if bounds.size == 2: low, high = bounds else: low, high = bounds[i] if ant[i] >= low and ant[i] <= high: break pheromone = value(ant) # Place the new estimate in the archive place = numpy.searchsorted(trail, pheromone) if place == archive_size: continue trail.insert(place, pheromone) trail.pop() archive.insert(place, ant) archive.pop() stats['objective'].append(trail[0]) stats['iterations'] += 1 yield iteration, archive[0], copy.deepcopy(stats)