Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ regression / _tools.py

import numpy as np
from statsmodels.tools.tools import Bunch


class _MinimalWLS(object):
    """
    Minimal implementation of WLS optimized for performance.

    Parameters
    ----------
    endog : array_like
        1-d endogenous response variable. The dependent variable.
    exog : array_like
        A nobs x k array where `nobs` is the number of observations and `k`
        is the number of regressors. An intercept is not included by default
        and should be added by the user. See
        :func:`statsmodels.tools.add_constant`.
    weights : array_like, optional
        1d array of weights.  If you supply 1/W then the variables are pre-
        multiplied by 1/sqrt(W).  If no weights are supplied the default value
        is 1 and WLS reults are the same as OLS.
    check_endog : bool, optional
        Flag indicating whether to check for inf/nan in endog.
        If True and any are found, ValueError is raised.
    check_weights : bool, optional
        Flag indicating whether to check for inf/nan in weights.
        If True and any are found, ValueError is raised.

    Notes
    -----
    Provides only resid, scale, fittedvalues, model.weights which are used by
    methods that iteratively apply WLS.

    Does not perform any checks on the input data for type or shape
    compatibility
    """

    msg = 'NaN, inf or invalid value detected in {0}, estimation infeasible.'

    def __init__(self, endog, exog, weights=1.0, check_endog=False,
                 check_weights=False):
        self.endog = endog
        self.exog = exog
        self.weights = weights
        w_half = np.sqrt(weights)
        if check_weights:
            if not np.all(np.isfinite(w_half)):
                raise ValueError(self.msg.format('weights'))

        if check_endog:
            if not np.all(np.isfinite(endog)):
                raise ValueError(self.msg.format('endog'))

        self.wendog = w_half * endog
        if np.isscalar(weights):
            self.wexog = w_half * exog
        else:
            self.wexog = w_half[:, None] * exog

    def fit(self, method='pinv'):
        """
        Minimal implementation of WLS optimized for performance.

        Parameters
        ----------
        method : str, optional
            Method to use to estimate parameters.  "pinv", "qr" or "lstsq"

              * "pinv" uses the Moore-Penrose pseudoinverse
                 to solve the least squares problem.
              * "qr" uses the QR factorization.
              * "lstsq" uses the least squares implementation in numpy.linalg

        Returns
        -------
        results : namedtuple
            Named tuple containing the fewest terms needed to implement
            iterative estimation in models. Currently

              * params : Estimated parameters
              * fittedvalues : Fit values using original data
              * resid : Residuals using original data
              * model : namedtuple with one field, weights
              * scale : scale computed using weighted residuals

        Notes
        -----
        Does not perform and checks on the input data

        See Also
        --------
        statsmodels.regression.linear_model.WLS
        """
        if method == 'pinv':
            pinv_wexog = np.linalg.pinv(self.wexog)
            params = pinv_wexog.dot(self.wendog)
        elif method == 'qr':
            Q, R = np.linalg.qr(self.wexog)
            params = np.linalg.solve(R, np.dot(Q.T, self.wendog))
        else:
            params, _, _, _ = np.linalg.lstsq(self.wexog, self.wendog,
                                              rcond=-1)
        return self.results(params)

    def results(self, params):
        """
        Construct results

        params : ndarray
            Model parameters

        Notes
        -----
        Allows results to be constructed from either existing parameters or
        when estimated using using ``fit``
        """
        fitted_values = self.exog.dot(params)
        resid = self.endog - fitted_values
        wresid = self.wendog - self.wexog.dot(params)
        df_resid = self.wexog.shape[0] - self.wexog.shape[1]
        scale = np.dot(wresid, wresid) / df_resid

        return Bunch(params=params, fittedvalues=fitted_values, resid=resid,
                     model=self, scale=scale)