Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ stats / regularized_covariance.py

from statsmodels.regression.linear_model import OLS
import numpy as np


def _calc_nodewise_row(exog, idx, alpha):
    """calculates the nodewise_row values for the idxth variable, used to
    estimate approx_inv_cov.

    Parameters
    ----------
    exog : array_like
        The weighted design matrix for the current partition.
    idx : scalar
        Index of the current variable.
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.

    Returns
    -------
    An array-like object of length p-1

    Notes
    -----

    nodewise_row_i = arg min 1/(2n) ||exog_i - exog_-i gamma||_2^2
                             + alpha ||gamma||_1
    """

    p = exog.shape[1]
    ind = list(range(p))
    ind.pop(idx)

    # handle array alphas
    if not np.isscalar(alpha):
        alpha = alpha[ind]

    tmod = OLS(exog[:, idx], exog[:, ind])

    nodewise_row = tmod.fit_regularized(alpha=alpha).params

    return nodewise_row


def _calc_nodewise_weight(exog, nodewise_row, idx, alpha):
    """calculates the nodewise_weightvalue for the idxth variable, used to
    estimate approx_inv_cov.

    Parameters
    ----------
    exog : array_like
        The weighted design matrix for the current partition.
    nodewise_row : array_like
        The nodewise_row values for the current variable.
    idx : scalar
        Index of the current variable
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.

    Returns
    -------
    A scalar

    Notes
    -----

    nodewise_weight_i = sqrt(1/n ||exog,i - exog_-i nodewise_row||_2^2
                             + alpha ||nodewise_row||_1)
    """

    n, p = exog.shape
    ind = list(range(p))
    ind.pop(idx)

    # handle array alphas
    if not np.isscalar(alpha):
        alpha = alpha[ind]

    d = np.linalg.norm(exog[:, idx] - exog[:, ind].dot(nodewise_row))**2
    d = np.sqrt(d / n + alpha * np.linalg.norm(nodewise_row, 1))
    return d


def _calc_approx_inv_cov(nodewise_row_l, nodewise_weight_l):
    """calculates the approximate inverse covariance matrix

    Parameters
    ----------
    nodewise_row_l : list
        A list of array-like object where each object corresponds to
        the nodewise_row values for the corresponding variable, should
        be length p.
    nodewise_weight_l : list
        A list of scalars where each scalar corresponds to the nodewise_weight
        value for the corresponding variable, should be length p.

    Returns
    ------
    An array-like object, p x p matrix

    Notes
    -----

    nwr = nodewise_row
    nww = nodewise_weight

    approx_inv_cov_j = - 1 / nww_j [nwr_j,1,...,1,...nwr_j,p]
    """

    p = len(nodewise_weight_l)

    approx_inv_cov = -np.eye(p)
    for idx in range(p):
        ind = list(range(p))
        ind.pop(idx)
        approx_inv_cov[idx, ind] = nodewise_row_l[idx]
    approx_inv_cov *= -1 / nodewise_weight_l[:, None]**2

    return approx_inv_cov


class RegularizedInvCovariance(object):
    """
    Class for estimating regularized inverse covariance with
    nodewise regression

    Parameters
    ----------
    exog : array_like
        A weighted design matrix for covariance

    Attributes
    ----------
    exog : array_like
        A weighted design matrix for covariance
    alpha : scalar
        Regularizing constant
    """

    def __init__(self, exog):

        self.exog = exog

    def fit(self, alpha=0):
        """estimates the regularized inverse covariance using nodewise
        regression

        Parameters
        ----------
        alpha : scalar
            Regularizing constant
        """

        n, p = self.exog.shape

        nodewise_row_l = []
        nodewise_weight_l = []

        for idx in range(p):
            nodewise_row = _calc_nodewise_row(self.exog, idx, alpha)
            nodewise_row_l.append(nodewise_row)

            nodewise_weight = _calc_nodewise_weight(self.exog, nodewise_row,
                                                    idx, alpha)
            nodewise_weight_l.append(nodewise_weight)

        nodewise_row_l = np.array(nodewise_row_l)
        nodewise_weight_l = np.array(nodewise_weight_l)

        approx_inv_cov = _calc_approx_inv_cov(nodewise_row_l,
                                              nodewise_weight_l)

        self._approx_inv_cov = approx_inv_cov

    def approx_inv_cov(self):
        return self._approx_inv_cov