"""
Ready made classes for regularization.
Each class represents a regularizing function. They can be used by adding them
to a :class:`~fatiando.inversion.misfit.Misfit` derivative (all inversions in
Fatiando are derived from Misfit).
The regularization parameter is set by multiplying the regularization instance
by a scalar, e.g., ``solver = misfit + 0.1*regularization``.
See :class:`fatiando.gravmag.eqlayer.EQLGravity` for an example.
**List of classes**
* :class:`~fatiando.inversion.regularization.Damping`: Damping regularization
  (or 0th order Tikhonov regularization)
* :class:`~fatiando.inversion.regularization.Smoothness`: Generic smoothness
  regularization (or 1st order Tikhonov regularization). Requires a finite
  difference matrix to specify the parameter derivatives to minimize.
* :class:`~fatiando.inversion.regularization.Smoothness1D`: Smoothness for 1D
  problems. Automatically builds a finite difference matrix based on the number
  of parameters
* :class:`~fatiando.inversion.regularization.Smoothness2D`: Smoothness for 2D
  grid based problems. Automatically builds a finite difference matrix of
  derivatives in the two spacial dimensions based on the shape of the parameter
  grid
* :class:`~fatiando.inversion.regularization.TotalVariation`: Generic total
  variation regularization (enforces sharpness of the solution). Requires a
  finite difference matrix to specify the parameter derivatives.
* :class:`~fatiando.inversion.regularization.TotalVariation1D`: Total variation
  for 1D problems. Similar to Smoothness1D
* :class:`~fatiando.inversion.regularization.TotalVariation2D`: Total variation
  for 2D grid based problems. Similar to Smoothness2D
----
"""
from __future__ import division
from future.builtins import super
import copy
import numpy
import scipy.sparse
from .base import OperatorMixin, CachedMethod, CachedMethodPermanent
from ..utils import safe_dot
__all__ = ["Damping", "Smoothness", "Smoothness1D", "Smoothness2D",
           "TotalVariation", "TotalVariation1D", "TotalVariation2D"]
class Regularization(OperatorMixin):
    """
    Generic regularizing function.
    When subclassing this, you must implemenent the ``value`` method.
    """
    def __init__(self, nparams, islinear):
        self.nparams = nparams
        self.islinear = islinear
        if islinear and hasattr(self, 'hessian'):
            self.hessian = CachedMethodPermanent(self, 'hessian')
    def copy(self, deep=False):
        """
        Make a copy of me together with all the cached methods.
        """
        if deep:
            obj = copy.deepcopy(self)
        else:
            obj = copy.copy(self)
        if self.islinear and hasattr(self, 'hessian'):
            obj.hessian = copy.copy(obj.hessian)
            obj.hessian.instance = obj
        return obj
[docs]class Damping(Regularization):
    r"""
    Damping (0th order Tikhonov) regularization.
    Imposes the minimum norm of the parameter vector.
    The regularizing function if of the form
    .. math::
        \theta^{NM}(\bar{p}) = \bar{p}^T\bar{p}
    Its gradient and Hessian matrices are, respectively,
    .. math::
        \bar{\nabla}\theta^{NM}(\bar{p}) = 2\bar{\bar{I}}\bar{p}
    .. math::
        \bar{\bar{\nabla}}\theta^{NM}(\bar{p}) = 2\bar{\bar{I}}
    Parameters:
    * nparams : int
        The number of parameter
    Examples:
    >>> import numpy
    >>> damp = Damping(3)
    >>> p = numpy.array([0, 0, 0])
    >>> damp.value(p)
    0.0
    >>> damp.hessian(p).todense()
    matrix([[ 2.,  0.,  0.],
            [ 0.,  2.,  0.],
            [ 0.,  0.,  2.]])
    >>> damp.gradient(p)
    array([ 0.,  0.,  0.])
    >>> p = numpy.array([1, 0, 0])
    >>> damp.value(p)
    1.0
    >>> damp.hessian(p).todense()
    matrix([[ 2.,  0.,  0.],
            [ 0.,  2.,  0.],
            [ 0.,  0.,  2.]])
    >>> damp.gradient(p)
    array([ 2.,  0.,  0.])
    The Hessian matrix is cached so that it is only generated on the first
    call to ``damp.hessian`` (unlike the gradient, which is calculated every
    time).
    >>> damp.hessian(p) is damp.hessian(p)
    True
    >>> damp.gradient(p) is damp.gradient(p)
    False
    """
    def __init__(self, nparams):
        super().__init__(nparams, islinear=True)
[docs]    def hessian(self, p):
        """
        Calculate the Hessian matrix.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * hessian : 2d-array
            The Hessian
        """
        return self.regul_param*2*scipy.sparse.identity(self.nparams).tocsr() 
[docs]    def gradient(self, p):
        """
        Calculate the gradient vector.
        Parameters:
        * p : 1d-array or None
            The parameter vector. If None, will return 0.
        Returns:
        * gradient : 1d-array
            The gradient
        """
        if p is None:
            grad = 0
        else:
            grad = 2.*p
        return self.regul_param*grad 
[docs]    def value(self, p):
        """
        Calculate the value of this function.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * value : float
            The value of this function evaluated at *p*
        """
        return self.regul_param*numpy.linalg.norm(p)**2  
[docs]class Smoothness(Regularization):
    r"""
    Smoothness (1st order Tikhonov) regularization.
    Imposes that adjacent parameters have values close to each other.
    The regularizing function if of the form
    .. math::
        \theta^{SV}(\bar{p}) =
        \bar{p}^T \bar{\bar{R}}^T \bar{\bar{R}}\bar{p}
    Its gradient and Hessian matrices are, respectively,
    .. math::
        \bar{\nabla}\theta^{SV}(\bar{p}) =
        2\bar{\bar{R}}^T \bar{\bar{R}}\bar{p}
    .. math::
        \bar{\bar{\nabla}}\theta^{SV}(\bar{p}) =
        2\bar{\bar{R}}^T \bar{\bar{R}}
    in which matrix :math:`\bar{\bar{R}}` is a finite difference matrix. It
    represents the differences between one parameter and another and is what
    indicates what *adjacent* means.
    Parameters:
    * fdmat : 2d-array or sparse matrix
        The finite difference matrix
    Examples:
    >>> import numpy as np
    >>> fd = np.array([[1, -1, 0],
    ...                [0, 1, -1]])
    >>> smooth = Smoothness(fd)
    >>> p = np.array([0, 0, 0])
    >>> smooth.value(p)
    0.0
    >>> smooth.gradient(p)
    array([0, 0, 0])
    >>> smooth.hessian(p)
    array([[ 2, -2,  0],
           [-2,  4, -2],
           [ 0, -2,  2]])
    >>> p = np.array([1, 0, 1])
    >>> smooth.value(p)
    2.0
    >>> smooth.gradient(p)
    array([ 2, -4,  2])
    >>> smooth.hessian(p)
    array([[ 2, -2,  0],
           [-2,  4, -2],
           [ 0, -2,  2]])
    The Hessian matrix is cached so that it is only generated on the first
    call to ``hessian`` (unlike the gradient, which is calculated every
    time).
    >>> smooth.hessian(p) is smooth.hessian(p)
    True
    >>> smooth.gradient(p) is smooth.gradient(p)
    False
    """
    def __init__(self, fdmat):
        super().__init__(fdmat.shape[1], islinear=True)
        self.fdmat = fdmat
[docs]    def hessian(self, p):
        """
        Calculate the Hessian matrix.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * hessian : 2d-array
            The Hessian
        """
        return self.regul_param*2*safe_dot(self.fdmat.T, self.fdmat) 
[docs]    def gradient(self, p):
        """
        Calculate the gradient vector.
        Parameters:
        * p : 1d-array or None
            The parameter vector. If None, will return 0.
        Returns:
        * gradient : 1d-array
            The gradient
        """
        if p is None:
            grad = 0
        else:
            grad = safe_dot(self.hessian(p), p)
        return self.regul_param*grad 
[docs]    def value(self, p):
        """
        Calculate the value of this function.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * value : float
            The value of this function evaluated at *p*
        """
        # Need to divide by 2 because the hessian is 2*R.T*R
        return self.regul_param*safe_dot(p.T, safe_dot(self.hessian(p), p))/2  
[docs]class Smoothness1D(Smoothness):
    """
    Smoothness regularization for 1D problems.
    Extends the generic :class:`~fatiando.inversion.regularization.Smoothness`
    class by automatically building the finite difference matrix.
    Parameters:
    * npoints : int
        The number of parameters
    Examples:
    >>> import numpy as np
    >>> s = Smoothness1D(3)
    >>> p = np.array([0, 0, 0])
    >>> s.value(p)
    0.0
    >>> s.gradient(p)
    array([0, 0, 0])
    >>> s.hessian(p).todense()
    matrix([[ 2, -2,  0],
            [-2,  4, -2],
            [ 0, -2,  2]])
    >>> p = np.array([1, 0, 1])
    >>> s.value(p)
    2.0
    >>> s.gradient(p)
    array([ 2, -4,  2])
    >>> s.hessian(p).todense()
    matrix([[ 2, -2,  0],
            [-2,  4, -2],
            [ 0, -2,  2]])
    """
    def __init__(self, npoints):
        super().__init__(fd1d(npoints)) 
[docs]class Smoothness2D(Smoothness):
    """
    Smoothness regularization for 2D problems.
    Extends the generic :class:`~fatiando.inversion.regularization.Smoothness`
    class by automatically building the finite difference matrix.
    Parameters:
    * shape : tuple = (ny, nx)
        The shape of the parameter grid. Number of parameters in the y and x
        (or z and x, time and offset, etc) dimensions.
    Examples:
    >>> import numpy as np
    >>> s = Smoothness2D((2, 2))
    >>> p = np.array([[0, 0],
    ...               [0, 0]]).ravel()
    >>> s.value(p)
    0.0
    >>> s.gradient(p)
    array([0, 0, 0, 0])
    >>> s.hessian(p).todense()
    matrix([[ 4, -2, -2,  0],
            [-2,  4,  0, -2],
            [-2,  0,  4, -2],
            [ 0, -2, -2,  4]])
    >>> p = np.array([[1, 0],
    ...               [2, 3]]).ravel()
    >>> s.value(p)
    12.0
    >>> s.gradient(p)
    array([ 0, -8,  0,  8])
    >>> s.hessian(p).todense()
    matrix([[ 4, -2, -2,  0],
            [-2,  4,  0, -2],
            [-2,  0,  4, -2],
            [ 0, -2, -2,  4]])
    """
    def __init__(self, shape):
        super().__init__(fd2d(shape)) 
[docs]class TotalVariation(Regularization):
    r"""
    Total variation regularization.
    Imposes that adjacent parameters have a few sharp transitions.
    The regularizing function if of the form
    .. math::
        \theta^{VT}(\bar{p}) = \sum\limits_{k=1}^L |v_k|
    where vector :math:`\bar{v} = \bar{\bar{R}}\bar{p}`. See
    :class:`~fatiando.inversion.regularization.Smoothness` for the definition
    of the :math:`\bar{\bar{R}}` matrix.
    This functions is not differentiable at the null vector, so the following
    form is used to calculate the gradient and Hessian
    .. math::
        \theta^{VT}(\bar{p}) \approx \theta^{VT}_\beta(\bar{p}) =
        \sum\limits_{k=1}^L \sqrt{v_k^2 + \beta}
    Its gradient and Hessian matrices are, respectively,
    .. math::
        \bar{\nabla}\theta^{VT}_\beta(\bar{p}) =
        \bar{\bar{R}}^T \bar{q}(\bar{p})
    .. math::
        \bar{\bar{\nabla}}\theta^{VT}_\beta(\bar{p}) =
        \bar{\bar{R}}^T \bar{\bar{Q}}(\bar{p})\bar{\bar{R}}
    and
    .. math::
        \bar{q}(\bar{p}) =
        \begin{bmatrix}
            \dfrac{v_1}{\sqrt{v_1^2 + \beta}} \\
            \dfrac{v_2}{\sqrt{v_2^2 + \beta}} \\
            \vdots \\ \dfrac{v_L}{\sqrt{v_L^2 + \beta}}
        \end{bmatrix}
    .. math::
        \bar{\bar{Q}}(\bar{p}) =
        \begin{bmatrix}
            \dfrac{\beta}{(v_1^2 + \beta)^{\frac{3}{2}}} & 0 & \ldots & 0 \\
            0 & \dfrac{\beta}{(v_2^2 + \beta)^{\frac{3}{2}}} & \ldots & 0 \\
            \vdots & \vdots & \ddots & \vdots \\
            0 & 0 & \ldots & \dfrac{\beta}{(v_L^2 + \beta)^{\frac{3}{2}}}
        \end{bmatrix}
    Parameters:
    * beta : float
        The beta parameter for the differentiable approximation. The larger it
        is, the closer total variation is to
        :class:`~fatiando.inversion.regularization.Smoothness`. Should be a
        small, positive value.
    * fdmat : 2d-array or sparse matrix
        The finite difference matrix
    """
    def __init__(self, beta, fdmat):
        if beta <= 0:
            raise ValueError("Invalid beta=%g. Must be > 0" % (beta))
        super().__init__(
            nparams=fdmat.shape[1], islinear=False)
        self.beta = beta
        self.fdmat = fdmat
[docs]    def value(self, p):
        """
        Calculate the value of this function.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * value : float
            The value of this function evaluated at *p*
        """
        return self.regul_param*numpy.linalg.norm(safe_dot(self.fdmat, p), 1) 
[docs]    def hessian(self, p):
        """
        Calculate the Hessian matrix.
        Parameters:
        * p : 1d-array
            The parameter vector
        Returns:
        * hessian : 2d-array
            The Hessian
        """
        derivs = safe_dot(self.fdmat, p)
        q = self.beta/((derivs**2 + self.beta)**1.5)
        q_matrix = scipy.sparse.diags(q, 0).tocsr()
        return self.regul_param*safe_dot(self.fdmat.T, q_matrix*self.fdmat) 
[docs]    def gradient(self, p):
        """
        Calculate the gradient vector.
        Parameters:
        * p : 1d-array
            The parameter vector.
        Returns:
        * gradient : 1d-array
            The gradient
        """
        derivs = safe_dot(self.fdmat, p)
        q = derivs/numpy.sqrt(derivs**2 + self.beta)
        grad = safe_dot(self.fdmat.T, q)
        if len(grad.shape) > 1:
            grad = numpy.array(grad.T).ravel()
        return self.regul_param*grad  
[docs]class TotalVariation1D(TotalVariation):
    """
    Total variation regularization for 1D problems.
    Extends the generic
    :class:`~fatiando.inversion.regularization.TotalVariation`
    class by automatically building the finite difference matrix.
    Parameters:
    * beta : float
        The beta parameter for the differentiable approximation. The larger it
        is, the closer total variation is to
        :class:`~fatiando.inversion.regularization.Smoothness`. Should be a
        small, positive value.
    * npoints : int
        The number of parameters
    """
    def __init__(self, beta, npoints):
        super().__init__(beta, fd1d(npoints)) 
[docs]class TotalVariation2D(TotalVariation):
    """
    Total variation regularization for 2D problems.
    Extends the generic
    :class:`~fatiando.inversion.regularization.TotalVariation`
    class by automatically building the finite difference matrix.
    Parameters:
    * beta : float
        The beta parameter for the differentiable approximation. The larger it
        is, the closer total variation is to
        :class:`~fatiando.inversion.regularization.Smoothness`. Should be a
        small, positive value.
    * shape : tuple = (ny, nx)
        The shape of the parameter grid. Number of parameters in the y and x
        (or z and x, time and offset, etc) dimensions.
    """
    def __init__(self, beta, shape):
        super().__init__(beta, fd2d(shape)) 
def fd1d(size):
    """
    Produce a 1D finite difference matrix.
    Parameters:
    * size : int
        The number of points
    Returns:
    * fd : sparse CSR matrix
        The finite difference matrix
    Examples:
    >>> fd1d(2).todense()
    matrix([[ 1, -1]])
    >>> fd1d(3).todense()
    matrix([[ 1, -1,  0],
            [ 0,  1, -1]])
    >>> fd1d(4).todense()
    matrix([[ 1, -1,  0,  0],
            [ 0,  1, -1,  0],
            [ 0,  0,  1, -1]])
    """
    i = range(size - 1) + range(size - 1)
    j = range(size - 1) + range(1, size)
    v = [1] * (size - 1) + [-1] * (size - 1)
    return scipy.sparse.coo_matrix((v, (i, j)), (size - 1, size)).tocsr()
def fd2d(shape):
    """
    Produce a 2D finite difference matrix.
    Parameters:
    * shape : tuple = (ny, nx)
        The shape of the parameter grid. Number of parameters in the y and x
        (or z and x, time and offset, etc) dimensions.
    Returns:
    * fd : sparse CSR matrix
        The finite difference matrix
    Examples:
    >>> fd2d((2, 2)).todense()
    matrix([[ 1, -1,  0,  0],
            [ 0,  0,  1, -1],
            [ 1,  0, -1,  0],
            [ 0,  1,  0, -1]])
    >>> fd2d((2, 3)).todense()
    matrix([[ 1, -1,  0,  0,  0,  0],
            [ 0,  1, -1,  0,  0,  0],
            [ 0,  0,  0,  1, -1,  0],
            [ 0,  0,  0,  0,  1, -1],
            [ 1,  0,  0, -1,  0,  0],
            [ 0,  1,  0,  0, -1,  0],
            [ 0,  0,  1,  0,  0, -1]])
    """
    ny, nx = shape
    nderivs = (nx - 1) * ny + (ny - 1) * nx
    I, J, V = [], [], []
    deriv = 0
    param = 0
    for i in xrange(ny):
        for j in xrange(nx - 1):
            I.extend([deriv, deriv])
            J.extend([param, param + 1])
            V.extend([1, -1])
            deriv += 1
            param += 1
        param += 1
    param = 0
    for i in xrange(ny - 1):
        for j in xrange(nx):
            I.extend([deriv, deriv])
            J.extend([param, param + nx])
            V.extend([1, -1])
            deriv += 1
            param += 1
    return scipy.sparse.coo_matrix((V, (I, J)), (nderivs, nx * ny)).tocsr()