# Note: The information criteria add 1 to the number of parameters
# whenever the model has an AR or MA term since, in principle,
# the variance could be treated as a free parameter and restricted
# This code does not allow this, but it adds consistency with other
# packages such as gretl and X12-ARIMA
import copy
from datetime import datetime
import numpy as np
import pandas as pd
from numpy import dot, log, zeros, pi
from numpy.linalg import inv
from scipy import optimize
from scipy.signal import lfilter
from scipy.stats import norm
from statsmodels.compat.pandas import Appender
import statsmodels.base.wrapper as wrap
from statsmodels.regression.linear_model import yule_walker, OLS
from statsmodels.tools.decorators import cache_readonly
from statsmodels.tools.numdiff import approx_hess_cs, approx_fprime_cs
from statsmodels.tools.sm_exceptions import SpecificationWarning
from statsmodels.tools.validation import array_like, string_like
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.arima_process import arma2ma
from statsmodels.tsa.base import tsa_model
from statsmodels.tsa.kalmanf import KalmanFilter
from statsmodels.tsa.tsatools import (lagmat, add_trend,
_ar_transparams, _ar_invtransparams,
_ma_transparams, _ma_invtransparams,
unintegrate, unintegrate_levels)
from statsmodels.tsa.vector_ar import util
REPEATED_FIT_ERROR = """
Model has been fit using trend={0} and method={1}. These cannot be changed
in subsequent calls to `fit`. Instead, use a new instance of {mod}.
"""
_armax_notes = r"""
Notes
-----
If exogenous variables are given, then the model that is fit is
.. math::
\phi(L)(y_t - X_t\beta) = \theta(L)\epsilon_t
where :math:`\phi` and :math:`\theta` are polynomials in the lag
operator, :math:`L`. This is the regression model with ARMA errors,
or ARMAX model. This specification is used, whether or not the model
is fit using conditional sum of square or maximum-likelihood, using
the `method` argument in
:meth:`statsmodels.tsa.arima_model.%(Model)s.fit`. Therefore, for
now, `css` and `mle` refer to estimation methods only. This may
change for the case of the `css` model in future versions.
"""
_arma_params = """endog : array_like
The endogenous variable.
order : iterable
The (p,q) order of the model for the number of AR parameters,
and MA parameters to use.
exog : array_like, optional
An optional array of exogenous variables. This should *not* include a
constant or trend. You can specify this in the `fit` method."""
_arma_model = "Autoregressive Moving Average ARMA(p,q) Model"
_arima_model = "Autoregressive Integrated Moving Average ARIMA(p,d,q) Model"
_arima_params = """endog : array_like
The endogenous variable.
order : iterable
The (p,d,q) order of the model for the number of AR parameters,
differences, and MA parameters to use.
exog : array_like, optional
An optional array of exogenous variables. This should *not* include a
constant or trend. You can specify this in the `fit` method."""
_predict_notes = """
Notes
-----
Use the results predict method instead.
"""
_results_notes = """
Notes
-----
It is recommended to use dates with the time-series models, as the
below will probably make clear. However, if ARIMA is used without
dates and/or `start` and `end` are given as indices, then these
indices are in terms of the *original*, undifferenced series. Ie.,
given some undifferenced observations::
1970Q1, 1
1970Q2, 1.5
1970Q3, 1.25
1970Q4, 2.25
1971Q1, 1.2
1971Q2, 4.1
1970Q1 is observation 0 in the original series. However, if we fit an
ARIMA(p,1,q) model then we lose this first observation through
differencing. Therefore, the first observation we can forecast (if
using exact MLE) is index 1. In the differenced series this is index
0, but we refer to it as 1 from the original series.
"""
_predict = """
%(Model)s model in-sample and out-of-sample prediction
Parameters
----------
%(params)s
start : int, str, or datetime
Zero-indexed observation number at which to start forecasting, ie.,
the first forecast is start. Can also be a date string to
parse or a datetime type.
end : int, str, or datetime
Zero-indexed observation number at which to end forecasting, ie.,
the first forecast is start. Can also be a date string to
parse or a datetime type. However, if the dates index does not
have a fixed frequency, end must be an integer index if you
want out of sample prediction.
exog : array_like, optional
If the model is an ARMAX and out-of-sample forecasting is
requested, exog must be given. exog must be aligned so that exog[0]
is used to produce the first out-of-sample forecast. The number of
observation in exog should match the number of out-of-sample
forecasts produced. If the length of exog does not match the number
of forecasts, a SpecificationWarning is produced.
dynamic : bool, optional
The `dynamic` keyword affects in-sample prediction. If dynamic
is False, then the in-sample lagged values are used for
prediction. If `dynamic` is True, then in-sample forecasts are
used in place of lagged dependent variables. The first forecast
value is `start`.
%(extra_params)s
Returns
-------
%(returns)s
%(extra_section)s
"""
_predict_returns = """predict : ndarray
The predicted values.
"""
_arma_predict = _predict % {"Model": "ARMA",
"params": """params : array_like
The fitted parameters of the model.""",
"extra_params": "",
"returns": _predict_returns,
"extra_section": _predict_notes}
_arma_results_predict = _predict % {"Model": "ARMA", "params": "",
"extra_params": "",
"returns": _predict_returns,
"extra_section": _results_notes}
_arima_extras = """typ : str {'linear', 'levels'}
- 'linear' : Linear prediction in terms of the differenced
endogenous variables.
- 'levels' : Predict the levels of the original endogenous
variables.\n"""
_arima_predict = _predict % {"Model": "ARIMA",
"params": """params : array_like
The fitted parameters of the model.""",
"extra_params": _arima_extras,
"returns": _predict_returns,
"extra_section": _predict_notes}
_arima_results_predict = _predict % {"Model": "ARIMA",
"params": "",
"extra_params": _arima_extras,
"returns": _predict_returns,
"extra_section": _results_notes}
_arima_plot_predict_example = """ Examples
--------
>>> import statsmodels.api as sm
>>> import matplotlib.pyplot as plt
>>> import pandas as pd
>>>
>>> dta = sm.datasets.sunspots.load_pandas().data[['SUNACTIVITY']]
>>> dta.index = pd.date_range(start='1700', end='2009', freq='A')
>>> res = sm.tsa.ARMA(dta, (3, 0)).fit()
>>> fig, ax = plt.subplots()
>>> ax = dta.loc['1950':].plot(ax=ax)
>>> fig = res.plot_predict('1990', '2012', dynamic=True, ax=ax,
... plot_insample=False)
>>> plt.show()
.. plot:: plots/arma_predict_plot.py
"""
_plot_extras = """alpha : float, optional
The confidence intervals for the forecasts are (1 - alpha)%
plot_insample : bool, optional
Whether to plot the in-sample series. Default is True.
ax : matplotlib.Axes, optional
Existing axes to plot with."""
_plot_predict = ("""
Plot forecasts
""" + '\n'.join(_predict.split('\n')[2:])) % {
"params": "",
"extra_params": _plot_extras,
"returns": """fig : Figure
The plotted Figure instance""",
"extra_section": ('\n' + _arima_plot_predict_example +
'\n' + _results_notes)
}
_arima_plot_predict = ("""
Plot forecasts
""" + '\n'.join(_predict.split('\n')[2:])) % {
"params": "",
"extra_params": _plot_extras,
"returns": """fig : Figure
The plotted Figure instance""",
"extra_section": ('\n' + _arima_plot_predict_example + '\n' +
'\n'.join(_results_notes.split('\n')[:3])
+ ("""
This is hard-coded to only allow plotting of the forecasts in levels.
""") + '\n'.join(_results_notes.split('\n')[3:]))
}
def cumsum_n(x, n):
for _ in range(n):
x = np.cumsum(x)
return x
def _prediction_adjust_exog(exog, row_labels, dynamic, end):
"""
Adjust exog if exog has dates that align with endog
Parameters
----------
exog : {array_like, None}
The exog values
row_labels : {pd.DatetimeIndex, None}
Row labels from endog
dynamic : bool
Flag indicating whether dynamic forecasts are expected
end : int
Index of final in-sample observation
"""
if exog is None:
return None
exog_start = 0
exog_index = getattr(exog, 'index', None)
exog_dates = isinstance(exog_index, pd.DatetimeIndex)
endog_dates = isinstance(row_labels, pd.DatetimeIndex)
date_adj = endog_dates and exog_dates and not dynamic
if date_adj and row_labels.isin(exog_index).all():
end_label = row_labels[end]
exog_start = exog.index.get_loc(end_label) + 1
exog = array_like(exog, 'exog', ndim=2)
return exog[exog_start:]
def _check_arima_start(start, k_ar, k_diff, method, dynamic):
if start < 0:
raise ValueError("The start index %d of the original series "
"has been differenced away" % start)
elif (dynamic or 'mle' not in method) and start < k_ar:
raise ValueError("Start must be >= k_ar for conditional MLE "
"or dynamic forecast. Got %d" % start)
def _get_predict_out_of_sample(endog, p, q, k_trend, k_exog, start, errors,
trendparam, exparams, arparams, maparams, steps,
method, exog=None):
"""
Returns endog, resid, mu of appropriate length for out of sample
prediction.
"""
if q:
resid = np.zeros(q)
if start and 'mle' in method or (start == p and not start == 0):
resid[:q] = errors[start - q:start]
elif start:
resid[:q] = errors[start - q - p:start - p]
else:
resid[:q] = errors[-q:]
else:
resid = None
y = endog
if k_trend == 1:
# use expectation not constant
if k_exog > 0:
# TODO: technically should only hold for MLE not
# conditional model. See #274.
# ensure 2-d for conformability
if np.ndim(exog) == 1 and k_exog == 1:
# have a 1d series of observations -> 2d
exog = exog[:, None]
elif np.ndim(exog) == 1:
# should have a 1d row of exog -> 2d
if len(exog) != k_exog:
raise ValueError("1d exog given and len(exog) != k_exog")
exog = exog[None, :]
X = lagmat(np.dot(exog, exparams), p, original='in', trim='both')
mu = trendparam * (1 - arparams.sum())
# arparams were reversed in unpack for ease later
mu = mu + (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None]
else:
mu = trendparam * (1 - arparams.sum())
mu = np.array([mu] * steps)
elif k_exog > 0:
X = np.dot(exog, exparams)
X = lagmat(X, p, original='in', trim='both')
mu = (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None]
else:
mu = np.zeros(steps)
endog = np.zeros(p + steps - 1)
if p and start:
endog[:p] = y[start - p:start]
elif p:
endog[:p] = y[-p:]
return endog, resid, mu
def _arma_predict_out_of_sample(params, steps, errors, p, q, k_trend, k_exog,
endog, exog=None, start=0, method='mle'):
(trendparam, exparams,
arparams, maparams) = _unpack_params(params, (p, q), k_trend,
k_exog, reverse=True)
endog, resid, mu = _get_predict_out_of_sample(endog, p, q, k_trend, k_exog,
start, errors, trendparam,
exparams, arparams,
Loading ...