"""
Recursive least squares model
Author: Chad Fulton
License: Simplified-BSD
"""
import numpy as np
import pandas as pd
from statsmodels.compat.pandas import Appender
from statsmodels.tools.data import _is_using_pandas
from statsmodels.tsa.statespace.mlemodel import (
MLEModel, MLEResults, MLEResultsWrapper, PredictionResults,
PredictionResultsWrapper)
from statsmodels.tsa.statespace.tools import concat
from statsmodels.tools.tools import Bunch
from statsmodels.tools.decorators import cache_readonly
import statsmodels.base.wrapper as wrap
# Columns are alpha = 0.1, 0.05, 0.025, 0.01, 0.005
_cusum_squares_scalars = np.array([
[1.0729830, 1.2238734, 1.3581015, 1.5174271, 1.6276236],
[-0.6698868, -0.6700069, -0.6701218, -0.6702672, -0.6703724],
[-0.5816458, -0.7351697, -0.8858694, -1.0847745, -1.2365861]
])
class RecursiveLS(MLEModel):
r"""
Recursive least squares
Parameters
----------
endog : array_like
The observed time-series process :math:`y`
exog : array_like
Array of exogenous regressors, shaped nobs x k.
constraints : array_like, str, or tuple
- array : An r x k array where r is the number of restrictions to
test and k is the number of regressors. It is assumed that the
linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
either a scalar or a length p row vector.
Notes
-----
Recursive least squares (RLS) corresponds to expanding window ordinary
least squares (OLS).
This model applies the Kalman filter to compute recursive estimates of the
coefficients and recursive residuals.
References
----------
.. [*] Durbin, James, and Siem Jan Koopman. 2012.
Time Series Analysis by State Space Methods: Second Edition.
Oxford University Press.
"""
def __init__(self, endog, exog, constraints=None, **kwargs):
# Standardize data
endog_using_pandas = _is_using_pandas(endog, None)
if not endog_using_pandas:
endog = np.asanyarray(endog)
exog_is_using_pandas = _is_using_pandas(exog, None)
if not exog_is_using_pandas:
exog = np.asarray(exog)
# Make sure we have 2-dimensional array
if exog.ndim == 1:
if not exog_is_using_pandas:
exog = exog[:, None]
else:
exog = pd.DataFrame(exog)
self.k_exog = exog.shape[1]
# Handle constraints
self.k_constraints = 0
self._r_matrix = self._q_matrix = None
if constraints is not None:
from patsy import DesignInfo
from statsmodels.base.data import handle_data
data = handle_data(endog, exog, **kwargs)
names = data.param_names
LC = DesignInfo(names).linear_constraint(constraints)
self._r_matrix, self._q_matrix = LC.coefs, LC.constants
self.k_constraints = self._r_matrix.shape[0]
constraint_endog = np.zeros((len(endog), len(self._r_matrix)))
if endog_using_pandas:
constraint_endog = pd.DataFrame(constraint_endog,
index=endog.index)
endog = concat([endog, constraint_endog], axis=1)
endog.values[:, 1:] = self._q_matrix[:, 0]
else:
endog[:, 1:] = self._q_matrix[:, 0]
# Handle coefficient initialization
kwargs.setdefault('initialization', 'diffuse')
# Initialize the state space representation
super(RecursiveLS, self).__init__(
endog, k_states=self.k_exog, exog=exog, **kwargs)
# Use univariate filtering by default
self.ssm.filter_univariate = True
# Concentrate the scale out of the likelihood function
self.ssm.filter_concentrated = True
# Setup the state space representation
self['design'] = np.zeros((self.k_endog, self.k_states, self.nobs))
self['design', 0] = self.exog[:, :, None].T
if self._r_matrix is not None:
self['design', 1:, :] = self._r_matrix[:, :, None]
self['transition'] = np.eye(self.k_states)
# Notice that the filter output does not depend on the measurement
# variance, so we set it here to 1
self['obs_cov', 0, 0] = 1.
self['transition'] = np.eye(self.k_states)
# Linear constraints are technically imposed by adding "fake" endog
# variables that are used during filtering, but for all model- and
# results-based purposes we want k_endog = 1.
if self._r_matrix is not None:
self.k_endog = 1
@classmethod
def from_formula(cls, formula, data, subset=None, constraints=None):
return super(MLEModel, cls).from_formula(formula, data, subset,
constraints=constraints)
def _validate_can_fix_params(self, param_names):
raise ValueError('Linear constraints on coefficients should be given'
' using the `constraints` argument in constructing.'
' the model. Other parameter constraints are not'
' available in the resursive least squares model.')
def fit(self):
"""
Fits the model by application of the Kalman filter
Returns
-------
RecursiveLSResults
"""
smoother_results = self.smooth(return_ssm=True)
with self.ssm.fixed_scale(smoother_results.scale):
res = self.smooth()
return res
def filter(self, return_ssm=False, **kwargs):
# Get the state space output
result = super(RecursiveLS, self).filter([], transformed=True,
cov_type='none',
return_ssm=True, **kwargs)
# Wrap in a results object
if not return_ssm:
params = result.filtered_state[:, -1]
cov_kwds = {
'custom_cov_type': 'nonrobust',
'custom_cov_params': result.filtered_state_cov[:, :, -1],
'custom_description': ('Parameters and covariance matrix'
' estimates are RLS estimates'
' conditional on the entire sample.')
}
result = RecursiveLSResultsWrapper(
RecursiveLSResults(self, params, result, cov_type='custom',
cov_kwds=cov_kwds)
)
return result
def smooth(self, return_ssm=False, **kwargs):
# Get the state space output
result = super(RecursiveLS, self).smooth([], transformed=True,
cov_type='none',
return_ssm=True, **kwargs)
# Wrap in a results object
if not return_ssm:
params = result.filtered_state[:, -1]
cov_kwds = {
'custom_cov_type': 'nonrobust',
'custom_cov_params': result.filtered_state_cov[:, :, -1],
'custom_description': ('Parameters and covariance matrix'
' estimates are RLS estimates'
' conditional on the entire sample.')
}
result = RecursiveLSResultsWrapper(
RecursiveLSResults(self, params, result, cov_type='custom',
cov_kwds=cov_kwds)
)
return result
@property
def endog_names(self):
endog_names = super(RecursiveLS, self).endog_names
return endog_names[0] if isinstance(endog_names, list) else endog_names
@property
def param_names(self):
return self.exog_names
@property
def start_params(self):
# Only parameter is the measurement disturbance standard deviation
return np.zeros(0)
def update(self, params, **kwargs):
"""
Update the parameters of the model
Updates the representation matrices to fill in the new parameter
values.
Parameters
----------
params : array_like
Array of new parameters.
transformed : bool, optional
Whether or not `params` is already transformed. If set to False,
`transform_params` is called. Default is True..
Returns
-------
params : array_like
Array of parameters.
"""
pass
class RecursiveLSResults(MLEResults):
"""
Class to hold results from fitting a recursive least squares model.
Parameters
----------
model : RecursiveLS instance
The fitted model instance
Attributes
----------
specification : dictionary
Dictionary including all attributes from the recursive least squares
model instance.
See Also
--------
statsmodels.tsa.statespace.kalman_filter.FilterResults
statsmodels.tsa.statespace.mlemodel.MLEResults
"""
def __init__(self, model, params, filter_results, cov_type='opg',
**kwargs):
super(RecursiveLSResults, self).__init__(
model, params, filter_results, cov_type, **kwargs)
# Since we are overriding params with things that are not MLE params,
# need to adjust df's
q = max(self.loglikelihood_burn, self.k_diffuse_states)
self.df_model = q - self.model.k_constraints
self.df_resid = self.nobs_effective - self.df_model
# Save _init_kwds
self._init_kwds = self.model._get_init_kwds()
# Save the model specification
self.specification = Bunch(**{
'k_exog': self.model.k_exog,
'k_constraints': self.model.k_constraints})
# Adjust results to remove "faux" endog from the constraints
if self.model._r_matrix is not None:
for name in ['forecasts', 'forecasts_error',
'forecasts_error_cov', 'standardized_forecasts_error',
'forecasts_error_diffuse_cov']:
setattr(self, name, getattr(self, name)[0:1])
@property
def recursive_coefficients(self):
"""
Estimates of regression coefficients, recursively estimated
Returns
-------
out: Bunch
Has the following attributes:
- `filtered`: a time series array with the filtered estimate of
the component
- `filtered_cov`: a time series array with the filtered estimate of
the variance/covariance of the component
- `smoothed`: a time series array with the smoothed estimate of
the component
- `smoothed_cov`: a time series array with the smoothed estimate of
the variance/covariance of the component
- `offset`: an integer giving the offset in the state vector where
this component begins
"""
out = None
spec = self.specification
start = offset = 0
end = offset + spec.k_exog
out = Bunch(
filtered=self.filtered_state[start:end],
filtered_cov=self.filtered_state_cov[start:end, start:end],
smoothed=None, smoothed_cov=None,
offset=offset
)
if self.smoothed_state is not None:
out.smoothed = self.smoothed_state[start:end]
if self.smoothed_state_cov is not None:
out.smoothed_cov = (
self.smoothed_state_cov[start:end, start:end])
return out
@cache_readonly
def resid_recursive(self):
r"""
Recursive residuals
Returns
-------
resid_recursive : array_like
An array of length `nobs` holding the recursive
residuals.
Notes
-----
These quantities are defined in, for example, Harvey (1989)
section 5.4. In fact, there he defines the standardized innovations in
equation 5.4.1, but in his version they have non-unit variance, whereas
the standardized forecast errors computed by the Kalman filter here
assume unit variance. To convert to Harvey's definition, we need to
Loading ...