import numpy as np
from statsmodels.tools.tools import Bunch
class _MinimalWLS(object):
"""
Minimal implementation of WLS optimized for performance.
Parameters
----------
endog : array_like
1-d endogenous response variable. The dependent variable.
exog : array_like
A nobs x k array where `nobs` is the number of observations and `k`
is the number of regressors. An intercept is not included by default
and should be added by the user. See
:func:`statsmodels.tools.add_constant`.
weights : array_like, optional
1d array of weights. If you supply 1/W then the variables are pre-
multiplied by 1/sqrt(W). If no weights are supplied the default value
is 1 and WLS reults are the same as OLS.
check_endog : bool, optional
Flag indicating whether to check for inf/nan in endog.
If True and any are found, ValueError is raised.
check_weights : bool, optional
Flag indicating whether to check for inf/nan in weights.
If True and any are found, ValueError is raised.
Notes
-----
Provides only resid, scale, fittedvalues, model.weights which are used by
methods that iteratively apply WLS.
Does not perform any checks on the input data for type or shape
compatibility
"""
msg = 'NaN, inf or invalid value detected in {0}, estimation infeasible.'
def __init__(self, endog, exog, weights=1.0, check_endog=False,
check_weights=False):
self.endog = endog
self.exog = exog
self.weights = weights
w_half = np.sqrt(weights)
if check_weights:
if not np.all(np.isfinite(w_half)):
raise ValueError(self.msg.format('weights'))
if check_endog:
if not np.all(np.isfinite(endog)):
raise ValueError(self.msg.format('endog'))
self.wendog = w_half * endog
if np.isscalar(weights):
self.wexog = w_half * exog
else:
self.wexog = w_half[:, None] * exog
def fit(self, method='pinv'):
"""
Minimal implementation of WLS optimized for performance.
Parameters
----------
method : str, optional
Method to use to estimate parameters. "pinv", "qr" or "lstsq"
* "pinv" uses the Moore-Penrose pseudoinverse
to solve the least squares problem.
* "qr" uses the QR factorization.
* "lstsq" uses the least squares implementation in numpy.linalg
Returns
-------
results : namedtuple
Named tuple containing the fewest terms needed to implement
iterative estimation in models. Currently
* params : Estimated parameters
* fittedvalues : Fit values using original data
* resid : Residuals using original data
* model : namedtuple with one field, weights
* scale : scale computed using weighted residuals
Notes
-----
Does not perform and checks on the input data
See Also
--------
statsmodels.regression.linear_model.WLS
"""
if method == 'pinv':
pinv_wexog = np.linalg.pinv(self.wexog)
params = pinv_wexog.dot(self.wendog)
elif method == 'qr':
Q, R = np.linalg.qr(self.wexog)
params = np.linalg.solve(R, np.dot(Q.T, self.wendog))
else:
params, _, _, _ = np.linalg.lstsq(self.wexog, self.wendog,
rcond=-1)
return self.results(params)
def results(self, params):
"""
Construct results
params : ndarray
Model parameters
Notes
-----
Allows results to be constructed from either existing parameters or
when estimated using using ``fit``
"""
fitted_values = self.exog.dot(params)
resid = self.endog - fitted_values
wresid = self.wendog - self.wexog.dot(params)
df_resid = self.wexog.shape[0] - self.wexog.shape[1]
scale = np.dot(wresid, wresid) / df_resid
return Bunch(params=params, fittedvalues=fitted_values, resid=resid,
model=self, scale=scale)