Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ sandbox / panel / panel_short.py

# -*- coding: utf-8 -*-
"""Panel data analysis for short T and large N

Created on Sat Dec 17 19:32:00 2011

Author: Josef Perktold
License: BSD-3


starting from scratch before looking at references again
just a stub to get the basic structure for group handling
target outsource as much as possible for reuse

Notes
-----

this is the basic version using a loop over individuals which will be more
widely applicable. Depending on the special cases, there will be faster
implementations possible (sparse, kroneker, ...)

the only two group specific methods or get_within_cov and whiten

"""

import numpy as np
from statsmodels.regression.linear_model import OLS, GLS
from statsmodels.tools.grouputils import GroupSorted


def sum_outer_product_loop(x, group_iter):
    '''sum outerproduct dot(x_i, x_i.T) over individuals

    loop version

    '''

    mom = 0
    for g in group_iter():
        x_g = x[g]
        #print 'x_g.shape', x_g.shape
        mom += np.outer(x_g, x_g)

    return mom

def sum_outer_product_balanced(x, n_groups):
    '''sum outerproduct dot(x_i, x_i.T) over individuals

    where x_i is (nobs_i, 1), and result is (nobs_i, nobs_i)

    reshape-dot version, for x.ndim=1 only

    '''
    xrs = x.reshape(-1, n_groups, order='F')
    return np.dot(xrs, xrs.T)  #should be (nobs_i, nobs_i)

    #x.reshape(n_groups, nobs_i,  k_vars) #, order='F')
    #... ? this is getting 3-dimensional  dot, tensordot?
    #needs (n_groups, k_vars, k_vars) array with sum over groups
    #NOT
    #I only need this for x is 1d, i.e. residual


def whiten_individuals_loop(x, transform, group_iter):
    '''apply linear transform for each individual

    loop version
    '''

    #Note: figure out dimension of transformed variable
    #so we can pre-allocate
    x_new = []
    for g in group_iter():
        x_g = x[g]
        x_new.append(np.dot(transform, x_g))

    return np.concatenate(x_new) #np.vstack(x_new)  #or np.array(x_new) #check shape



class ShortPanelGLS2(object):
    '''Short Panel with general intertemporal within correlation

    assumes data is stacked by individuals, panel is balanced and
    within correlation structure is identical across individuals.

    It looks like this can just inherit GLS and overwrite whiten
    '''

    def __init__(self, endog, exog, group):
        self.endog = endog
        self.exog = exog
        self.group = GroupSorted(group)
        self.n_groups = self.group.n_groups
        #self.nobs_group =   #list for unbalanced?

    def fit_ols(self):
        self.res_pooled = OLS(self.endog, self.exog).fit()
        return self.res_pooled  #return or not

    def get_within_cov(self, resid):
        #central moment or not?
        mom = sum_outer_product_loop(resid, self.group.group_iter)
        return mom / self.n_groups   #df correction ?

    def whiten_groups(self, x, cholsigmainv_i):
        #from scipy import sparse #use sparse
        wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
        return wx

    def fit(self):
        res_pooled = self.fit_ols() #get starting estimate
        sigma_i = self.get_within_cov(res_pooled.resid)
        self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
        wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
        wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
        #print wendog.shape, wexog.shape
        self.res1 = OLS(wendog, wexog).fit()
        return self.res1

class ShortPanelGLS(GLS):
    '''Short Panel with general intertemporal within correlation

    assumes data is stacked by individuals, panel is balanced and
    within correlation structure is identical across individuals.

    It looks like this can just inherit GLS and overwrite whiten
    '''

    def __init__(self, endog, exog, group, sigma_i=None):
        self.group = GroupSorted(group)
        self.n_groups = self.group.n_groups
        #self.nobs_group =   #list for unbalanced?
        nobs_i = len(endog) / self.n_groups #endog might later not be an ndarray
        #balanced only for now,
        #which is a requirement anyway in this case (full cov)
        #needs to change for parametrized sigma_i

        #
        if sigma_i is None:
            sigma_i = np.eye(int(nobs_i))
        self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T

        #super is taking care of endog, exog and sigma
        super(self.__class__, self).__init__(endog, exog, sigma=None)

    def get_within_cov(self, resid):
        #central moment or not?
        mom = sum_outer_product_loop(resid, self.group.group_iter)
        return mom / self.n_groups   #df correction ?

    def whiten_groups(self, x, cholsigmainv_i):
        #from scipy import sparse #use sparse
        wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
        return wx

    def _fit_ols(self):
        #used as starting estimate in old explicity version
        self.res_pooled = OLS(self.endog, self.exog).fit()
        return self.res_pooled  #return or not

    def _fit_old(self):
        #old explicit version
        res_pooled = self._fit_ols() #get starting estimate
        sigma_i = self.get_within_cov(res_pooled.resid)
        self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
        wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
        wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
        self.res1 = OLS(wendog, wexog).fit()
        return self.res1

    def whiten(self, x):
        #whiten x by groups, will be applied to endog and exog
        wx = whiten_individuals_loop(x, self.cholsigmainv_i, self.group.group_iter)
        return wx

    #copied from GLSHet and adjusted (boiler plate?)
    def fit_iterative(self, maxiter=3):
        """
        Perform an iterative two-step procedure to estimate the GLS model.

        Parameters
        ----------
        maxiter : int, optional
            the number of iterations

        Notes
        -----
        maxiter=1: returns the estimated based on given weights
        maxiter=2: performs a second estimation with the updated weights,
                   this is 2-step estimation
        maxiter>2: iteratively estimate and update the weights

        TODO: possible extension stop iteration if change in parameter
            estimates is smaller than x_tol

        Repeated calls to fit_iterative, will do one redundant pinv_wexog
        calculation. Calling fit_iterative(maxiter) once does not do any
        redundant recalculations (whitening or calculating pinv_wexog).
        """
        #Note: in contrast to GLSHet, we do not have an auxiliary regression here
        #      might be needed if there is more structure in cov_i

        #because we only have the loop we are not attaching the ols_pooled
        #initial estimate anymore compared to original version

        if maxiter < 1:
            raise ValueError('maxiter needs to be at least 1')

        import collections
        self.history = collections.defaultdict(list) #not really necessary

        for i in range(maxiter):
            #pinv_wexog is cached, delete it to force recalculation
            if hasattr(self, 'pinv_wexog'):
                del self.pinv_wexog

            #fit with current cov, GLS, i.e. OLS on whitened endog, exog
            results = self.fit()
            self.history['self_params'].append(results.params)

            if not i == maxiter-1:  #skip for last iteration, could break instead
                #print 'ols',
                self.results_old = results #store previous results for debugging

                #get cov from residuals of previous regression
                sigma_i = self.get_within_cov(results.resid)
                self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T

                #calculate new whitened endog and exog
                self.initialize()

        #note results is the wrapper, results._results is the results instance
        #results._results.results_residual_regression = res_resid
        return results