The fatiando package has been deprecated. Please check out the new tools in the Fatiando a Terra website: www.fatiando.org

Source code for fatiando.inversion.regularization

"""
Ready made classes for regularization.

Each class represents a regularizing function. They can be used by adding them
to a :class:`~fatiando.inversion.misfit.Misfit` derivative (all inversions in
Fatiando are derived from Misfit).
The regularization parameter is set by multiplying the regularization instance
by a scalar, e.g., ``solver = misfit + 0.1*regularization``.

See :class:`fatiando.gravmag.eqlayer.EQLGravity` for an example.

**List of classes**

* :class:`~fatiando.inversion.regularization.Damping`: Damping regularization
  (or 0th order Tikhonov regularization)
* :class:`~fatiando.inversion.regularization.Smoothness`: Generic smoothness
  regularization (or 1st order Tikhonov regularization). Requires a finite
  difference matrix to specify the parameter derivatives to minimize.
* :class:`~fatiando.inversion.regularization.Smoothness1D`: Smoothness for 1D
  problems. Automatically builds a finite difference matrix based on the number
  of parameters
* :class:`~fatiando.inversion.regularization.Smoothness2D`: Smoothness for 2D
  grid based problems. Automatically builds a finite difference matrix of
  derivatives in the two spacial dimensions based on the shape of the parameter
  grid
* :class:`~fatiando.inversion.regularization.TotalVariation`: Generic total
  variation regularization (enforces sharpness of the solution). Requires a
  finite difference matrix to specify the parameter derivatives.
* :class:`~fatiando.inversion.regularization.TotalVariation1D`: Total variation
  for 1D problems. Similar to Smoothness1D
* :class:`~fatiando.inversion.regularization.TotalVariation2D`: Total variation
  for 2D grid based problems. Similar to Smoothness2D

----

"""
from __future__ import division
from future.builtins import super
import copy

import numpy
import scipy.sparse

from .base import OperatorMixin, CachedMethod, CachedMethodPermanent
from ..utils import safe_dot

__all__ = ["Damping", "Smoothness", "Smoothness1D", "Smoothness2D",
           "TotalVariation", "TotalVariation1D", "TotalVariation2D"]


class Regularization(OperatorMixin):
    """
    Generic regularizing function.

    When subclassing this, you must implemenent the ``value`` method.
    """

    def __init__(self, nparams, islinear):
        self.nparams = nparams
        self.islinear = islinear
        if islinear and hasattr(self, 'hessian'):
            self.hessian = CachedMethodPermanent(self, 'hessian')

    def copy(self, deep=False):
        """
        Make a copy of me together with all the cached methods.
        """
        if deep:
            obj = copy.deepcopy(self)
        else:
            obj = copy.copy(self)
        if self.islinear and hasattr(self, 'hessian'):
            obj.hessian = copy.copy(obj.hessian)
            obj.hessian.instance = obj
        return obj


[docs]class Damping(Regularization): r""" Damping (0th order Tikhonov) regularization. Imposes the minimum norm of the parameter vector. The regularizing function if of the form .. math:: \theta^{NM}(\bar{p}) = \bar{p}^T\bar{p} Its gradient and Hessian matrices are, respectively, .. math:: \bar{\nabla}\theta^{NM}(\bar{p}) = 2\bar{\bar{I}}\bar{p} .. math:: \bar{\bar{\nabla}}\theta^{NM}(\bar{p}) = 2\bar{\bar{I}} Parameters: * nparams : int The number of parameter Examples: >>> import numpy >>> damp = Damping(3) >>> p = numpy.array([0, 0, 0]) >>> damp.value(p) 0.0 >>> damp.hessian(p).todense() matrix([[ 2., 0., 0.], [ 0., 2., 0.], [ 0., 0., 2.]]) >>> damp.gradient(p) array([ 0., 0., 0.]) >>> p = numpy.array([1, 0, 0]) >>> damp.value(p) 1.0 >>> damp.hessian(p).todense() matrix([[ 2., 0., 0.], [ 0., 2., 0.], [ 0., 0., 2.]]) >>> damp.gradient(p) array([ 2., 0., 0.]) The Hessian matrix is cached so that it is only generated on the first call to ``damp.hessian`` (unlike the gradient, which is calculated every time). >>> damp.hessian(p) is damp.hessian(p) True >>> damp.gradient(p) is damp.gradient(p) False """ def __init__(self, nparams): super().__init__(nparams, islinear=True)
[docs] def hessian(self, p): """ Calculate the Hessian matrix. Parameters: * p : 1d-array The parameter vector Returns: * hessian : 2d-array The Hessian """ return self.regul_param*2*scipy.sparse.identity(self.nparams).tocsr()
[docs] def gradient(self, p): """ Calculate the gradient vector. Parameters: * p : 1d-array or None The parameter vector. If None, will return 0. Returns: * gradient : 1d-array The gradient """ if p is None: grad = 0 else: grad = 2.*p return self.regul_param*grad
[docs] def value(self, p): """ Calculate the value of this function. Parameters: * p : 1d-array The parameter vector Returns: * value : float The value of this function evaluated at *p* """ return self.regul_param*numpy.linalg.norm(p)**2
[docs]class Smoothness(Regularization): r""" Smoothness (1st order Tikhonov) regularization. Imposes that adjacent parameters have values close to each other. The regularizing function if of the form .. math:: \theta^{SV}(\bar{p}) = \bar{p}^T \bar{\bar{R}}^T \bar{\bar{R}}\bar{p} Its gradient and Hessian matrices are, respectively, .. math:: \bar{\nabla}\theta^{SV}(\bar{p}) = 2\bar{\bar{R}}^T \bar{\bar{R}}\bar{p} .. math:: \bar{\bar{\nabla}}\theta^{SV}(\bar{p}) = 2\bar{\bar{R}}^T \bar{\bar{R}} in which matrix :math:`\bar{\bar{R}}` is a finite difference matrix. It represents the differences between one parameter and another and is what indicates what *adjacent* means. Parameters: * fdmat : 2d-array or sparse matrix The finite difference matrix Examples: >>> import numpy as np >>> fd = np.array([[1, -1, 0], ... [0, 1, -1]]) >>> smooth = Smoothness(fd) >>> p = np.array([0, 0, 0]) >>> smooth.value(p) 0.0 >>> smooth.gradient(p) array([0, 0, 0]) >>> smooth.hessian(p) array([[ 2, -2, 0], [-2, 4, -2], [ 0, -2, 2]]) >>> p = np.array([1, 0, 1]) >>> smooth.value(p) 2.0 >>> smooth.gradient(p) array([ 2, -4, 2]) >>> smooth.hessian(p) array([[ 2, -2, 0], [-2, 4, -2], [ 0, -2, 2]]) The Hessian matrix is cached so that it is only generated on the first call to ``hessian`` (unlike the gradient, which is calculated every time). >>> smooth.hessian(p) is smooth.hessian(p) True >>> smooth.gradient(p) is smooth.gradient(p) False """ def __init__(self, fdmat): super().__init__(fdmat.shape[1], islinear=True) self.fdmat = fdmat
[docs] def hessian(self, p): """ Calculate the Hessian matrix. Parameters: * p : 1d-array The parameter vector Returns: * hessian : 2d-array The Hessian """ return self.regul_param*2*safe_dot(self.fdmat.T, self.fdmat)
[docs] def gradient(self, p): """ Calculate the gradient vector. Parameters: * p : 1d-array or None The parameter vector. If None, will return 0. Returns: * gradient : 1d-array The gradient """ if p is None: grad = 0 else: grad = safe_dot(self.hessian(p), p) return self.regul_param*grad
[docs] def value(self, p): """ Calculate the value of this function. Parameters: * p : 1d-array The parameter vector Returns: * value : float The value of this function evaluated at *p* """ # Need to divide by 2 because the hessian is 2*R.T*R return self.regul_param*safe_dot(p.T, safe_dot(self.hessian(p), p))/2
[docs]class Smoothness1D(Smoothness): """ Smoothness regularization for 1D problems. Extends the generic :class:`~fatiando.inversion.regularization.Smoothness` class by automatically building the finite difference matrix. Parameters: * npoints : int The number of parameters Examples: >>> import numpy as np >>> s = Smoothness1D(3) >>> p = np.array([0, 0, 0]) >>> s.value(p) 0.0 >>> s.gradient(p) array([0, 0, 0]) >>> s.hessian(p).todense() matrix([[ 2, -2, 0], [-2, 4, -2], [ 0, -2, 2]]) >>> p = np.array([1, 0, 1]) >>> s.value(p) 2.0 >>> s.gradient(p) array([ 2, -4, 2]) >>> s.hessian(p).todense() matrix([[ 2, -2, 0], [-2, 4, -2], [ 0, -2, 2]]) """ def __init__(self, npoints): super().__init__(fd1d(npoints))
[docs]class Smoothness2D(Smoothness): """ Smoothness regularization for 2D problems. Extends the generic :class:`~fatiando.inversion.regularization.Smoothness` class by automatically building the finite difference matrix. Parameters: * shape : tuple = (ny, nx) The shape of the parameter grid. Number of parameters in the y and x (or z and x, time and offset, etc) dimensions. Examples: >>> import numpy as np >>> s = Smoothness2D((2, 2)) >>> p = np.array([[0, 0], ... [0, 0]]).ravel() >>> s.value(p) 0.0 >>> s.gradient(p) array([0, 0, 0, 0]) >>> s.hessian(p).todense() matrix([[ 4, -2, -2, 0], [-2, 4, 0, -2], [-2, 0, 4, -2], [ 0, -2, -2, 4]]) >>> p = np.array([[1, 0], ... [2, 3]]).ravel() >>> s.value(p) 12.0 >>> s.gradient(p) array([ 0, -8, 0, 8]) >>> s.hessian(p).todense() matrix([[ 4, -2, -2, 0], [-2, 4, 0, -2], [-2, 0, 4, -2], [ 0, -2, -2, 4]]) """ def __init__(self, shape): super().__init__(fd2d(shape))
[docs]class TotalVariation(Regularization): r""" Total variation regularization. Imposes that adjacent parameters have a few sharp transitions. The regularizing function if of the form .. math:: \theta^{VT}(\bar{p}) = \sum\limits_{k=1}^L |v_k| where vector :math:`\bar{v} = \bar{\bar{R}}\bar{p}`. See :class:`~fatiando.inversion.regularization.Smoothness` for the definition of the :math:`\bar{\bar{R}}` matrix. This functions is not differentiable at the null vector, so the following form is used to calculate the gradient and Hessian .. math:: \theta^{VT}(\bar{p}) \approx \theta^{VT}_\beta(\bar{p}) = \sum\limits_{k=1}^L \sqrt{v_k^2 + \beta} Its gradient and Hessian matrices are, respectively, .. math:: \bar{\nabla}\theta^{VT}_\beta(\bar{p}) = \bar{\bar{R}}^T \bar{q}(\bar{p}) .. math:: \bar{\bar{\nabla}}\theta^{VT}_\beta(\bar{p}) = \bar{\bar{R}}^T \bar{\bar{Q}}(\bar{p})\bar{\bar{R}} and .. math:: \bar{q}(\bar{p}) = \begin{bmatrix} \dfrac{v_1}{\sqrt{v_1^2 + \beta}} \\ \dfrac{v_2}{\sqrt{v_2^2 + \beta}} \\ \vdots \\ \dfrac{v_L}{\sqrt{v_L^2 + \beta}} \end{bmatrix} .. math:: \bar{\bar{Q}}(\bar{p}) = \begin{bmatrix} \dfrac{\beta}{(v_1^2 + \beta)^{\frac{3}{2}}} & 0 & \ldots & 0 \\ 0 & \dfrac{\beta}{(v_2^2 + \beta)^{\frac{3}{2}}} & \ldots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \ldots & \dfrac{\beta}{(v_L^2 + \beta)^{\frac{3}{2}}} \end{bmatrix} Parameters: * beta : float The beta parameter for the differentiable approximation. The larger it is, the closer total variation is to :class:`~fatiando.inversion.regularization.Smoothness`. Should be a small, positive value. * fdmat : 2d-array or sparse matrix The finite difference matrix """ def __init__(self, beta, fdmat): if beta <= 0: raise ValueError("Invalid beta=%g. Must be > 0" % (beta)) super().__init__( nparams=fdmat.shape[1], islinear=False) self.beta = beta self.fdmat = fdmat
[docs] def value(self, p): """ Calculate the value of this function. Parameters: * p : 1d-array The parameter vector Returns: * value : float The value of this function evaluated at *p* """ return self.regul_param*numpy.linalg.norm(safe_dot(self.fdmat, p), 1)
[docs] def hessian(self, p): """ Calculate the Hessian matrix. Parameters: * p : 1d-array The parameter vector Returns: * hessian : 2d-array The Hessian """ derivs = safe_dot(self.fdmat, p) q = self.beta/((derivs**2 + self.beta)**1.5) q_matrix = scipy.sparse.diags(q, 0).tocsr() return self.regul_param*safe_dot(self.fdmat.T, q_matrix*self.fdmat)
[docs] def gradient(self, p): """ Calculate the gradient vector. Parameters: * p : 1d-array The parameter vector. Returns: * gradient : 1d-array The gradient """ derivs = safe_dot(self.fdmat, p) q = derivs/numpy.sqrt(derivs**2 + self.beta) grad = safe_dot(self.fdmat.T, q) if len(grad.shape) > 1: grad = numpy.array(grad.T).ravel() return self.regul_param*grad
[docs]class TotalVariation1D(TotalVariation): """ Total variation regularization for 1D problems. Extends the generic :class:`~fatiando.inversion.regularization.TotalVariation` class by automatically building the finite difference matrix. Parameters: * beta : float The beta parameter for the differentiable approximation. The larger it is, the closer total variation is to :class:`~fatiando.inversion.regularization.Smoothness`. Should be a small, positive value. * npoints : int The number of parameters """ def __init__(self, beta, npoints): super().__init__(beta, fd1d(npoints))
[docs]class TotalVariation2D(TotalVariation): """ Total variation regularization for 2D problems. Extends the generic :class:`~fatiando.inversion.regularization.TotalVariation` class by automatically building the finite difference matrix. Parameters: * beta : float The beta parameter for the differentiable approximation. The larger it is, the closer total variation is to :class:`~fatiando.inversion.regularization.Smoothness`. Should be a small, positive value. * shape : tuple = (ny, nx) The shape of the parameter grid. Number of parameters in the y and x (or z and x, time and offset, etc) dimensions. """ def __init__(self, beta, shape): super().__init__(beta, fd2d(shape))
def fd1d(size): """ Produce a 1D finite difference matrix. Parameters: * size : int The number of points Returns: * fd : sparse CSR matrix The finite difference matrix Examples: >>> fd1d(2).todense() matrix([[ 1, -1]]) >>> fd1d(3).todense() matrix([[ 1, -1, 0], [ 0, 1, -1]]) >>> fd1d(4).todense() matrix([[ 1, -1, 0, 0], [ 0, 1, -1, 0], [ 0, 0, 1, -1]]) """ i = range(size - 1) + range(size - 1) j = range(size - 1) + range(1, size) v = [1] * (size - 1) + [-1] * (size - 1) return scipy.sparse.coo_matrix((v, (i, j)), (size - 1, size)).tocsr() def fd2d(shape): """ Produce a 2D finite difference matrix. Parameters: * shape : tuple = (ny, nx) The shape of the parameter grid. Number of parameters in the y and x (or z and x, time and offset, etc) dimensions. Returns: * fd : sparse CSR matrix The finite difference matrix Examples: >>> fd2d((2, 2)).todense() matrix([[ 1, -1, 0, 0], [ 0, 0, 1, -1], [ 1, 0, -1, 0], [ 0, 1, 0, -1]]) >>> fd2d((2, 3)).todense() matrix([[ 1, -1, 0, 0, 0, 0], [ 0, 1, -1, 0, 0, 0], [ 0, 0, 0, 1, -1, 0], [ 0, 0, 0, 0, 1, -1], [ 1, 0, 0, -1, 0, 0], [ 0, 1, 0, 0, -1, 0], [ 0, 0, 1, 0, 0, -1]]) """ ny, nx = shape nderivs = (nx - 1) * ny + (ny - 1) * nx I, J, V = [], [], [] deriv = 0 param = 0 for i in xrange(ny): for j in xrange(nx - 1): I.extend([deriv, deriv]) J.extend([param, param + 1]) V.extend([1, -1]) deriv += 1 param += 1 param += 1 param = 0 for i in xrange(ny - 1): for j in xrange(nx): I.extend([deriv, deriv]) J.extend([param, param + nx]) V.extend([1, -1]) deriv += 1 param += 1 return scipy.sparse.coo_matrix((V, (I, J)), (nderivs, nx * ny)).tocsr()