Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ sandbox / panel / correlation_structures.py

# -*- coding: utf-8 -*-
"""Correlation and Covariance Structures

Created on Sat Dec 17 20:46:05 2011

Author: Josef Perktold
License: BSD-3


Reference
---------
quick reading of some section on mixed effects models in S-plus and of
outline for GEE.

"""

import numpy as np

from statsmodels.regression.linear_model import yule_walker
from statsmodels.stats.moment_helpers import cov2corr


def corr_equi(k_vars, rho):
    '''create equicorrelated correlation matrix with rho on off diagonal

    Parameters
    ----------
    k_vars : int
        number of variables, correlation matrix will be (k_vars, k_vars)
    rho : float
        correlation between any two random variables

    Returns
    -------
    corr : ndarray (k_vars, k_vars)
        correlation matrix

    '''
    corr = np.empty((k_vars, k_vars))
    corr.fill(rho)
    corr[np.diag_indices_from(corr)] = 1
    return corr


def corr_ar(k_vars, ar):
    '''create autoregressive correlation matrix

    This might be MA, not AR, process if used for residual process - check

    Parameters
    ----------
    ar : array_like, 1d
        AR lag-polynomial including 1 for lag 0


    '''
    from scipy.linalg import toeplitz
    if len(ar) < k_vars:
        ar_ = np.zeros(k_vars)
        ar_[:len(ar)] = ar
        ar = ar_

    return toeplitz(ar)


def corr_arma(k_vars, ar, ma):
    '''create arma correlation matrix

    converts arma to autoregressive lag-polynomial with k_var lags

    ar and arma might need to be switched for generating residual process

    Parameters
    ----------
    ar : array_like, 1d
        AR lag-polynomial including 1 for lag 0
    ma : array_like, 1d
        MA lag-polynomial

    '''
    from scipy.linalg import toeplitz
    from statsmodels.tsa.arima_process import arma2ar

    # TODO: flesh out the comment below about a bug in arma2ar
    ar = arma2ar(ar, ma, lags=k_vars)[:k_vars]  # bug in arma2ar

    return toeplitz(ar)


def corr2cov(corr, std):
    '''convert correlation matrix to covariance matrix

    Parameters
    ----------
    corr : ndarray, (k_vars, k_vars)
        correlation matrix
    std : ndarray, (k_vars,) or scalar
        standard deviation for the vector of random variables. If scalar, then
        it is assumed that all variables have the same scale given by std.

    '''
    if np.size(std) == 1:
        std = std*np.ones(corr.shape[0])
    cov = corr * std[:, None] * std[None, :]  # same as outer product
    return cov


def whiten_ar(x, ar_coefs, order):
    """
    Whiten a series of columns according to an AR(p) covariance structure.

    This drops the initial conditions (Cochran-Orcut ?)
    Uses loop, so for short ar polynomials only, use lfilter otherwise

    This needs to improve, option on method, full additional to conditional

    Parameters
    ----------
    x : array_like, (nobs,) or (nobs, k_vars)
        The data to be whitened along axis 0
    ar_coefs : ndarray
        coefficients of AR lag- polynomial,   TODO: ar or ar_coefs?
    order : int

    Returns
    -------
    x_new : ndarray
        transformed array
    """

    rho = ar_coefs

    x = np.array(x, np.float64)
    _x = x.copy()
    # TODO: dimension handling is not DRY
    # I think previous code worked for 2d because of single index rows in np
    if x.ndim == 2:
        rho = rho[:, None]
    for i in range(order):
        _x[(i+1):] = _x[(i+1):] - rho[i] * x[0:-(i+1)]

    return _x[order:]


def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False):
    """
    Estimate AR(p) parameters from acovf using Yule-Walker equation.


    Parameters
    ----------
    acov : array_like, 1d
        auto-covariance
    order : int, optional
        The order of the autoregressive process.  Default is 1.
    inv : bool
        If inv is True the inverse of R is also returned.  Default is False.

    Returns
    -------
    rho : ndarray
        The estimated autoregressive coefficients
    sigma
        TODO
    Rinv : ndarray
        inverse of the Toepliz matrix
    """
    return yule_walker(acov, order=order, method=method, df=df, inv=inv,
                       demean=False)


class ARCovariance(object):
    '''
    experimental class for Covariance of AR process
    classmethod? staticmethods?
    '''

    def __init__(self, ar=None, ar_coefs=None, sigma=1.):
        if ar is not None:
            self.ar = ar
            self.ar_coefs = -ar[1:]
            self.k_lags = len(ar)
        elif ar_coefs is not None:
            self.arcoefs = ar_coefs
            self.ar = np.hstack(([1], -ar_coefs))
            self.k_lags = len(self.ar)

    @classmethod
    def fit(cls, cov, order, **kwds):
        rho, sigma = yule_walker_acov(cov, order=order, **kwds)
        return cls(ar_coefs=rho)

    def whiten(self, x):
        return whiten_ar(x, self.ar_coefs, order=self.order)

    def corr(self, k_vars=None):
        if k_vars is None:
            k_vars = len(self.ar)   # TODO: this could move into corr_arr
        return corr_ar(k_vars, self.ar)

    def cov(self, k_vars=None):
        return cov2corr(self.corr(k_vars=None), self.sigma)