Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ tsa / arima_model.py

# Note: The information criteria add 1 to the number of parameters
#       whenever the model has an AR or MA term since, in principle,
#       the variance could be treated as a free parameter and restricted
#       This code does not allow this, but it adds consistency with other
#       packages such as gretl and X12-ARIMA

import copy
from datetime import datetime

import numpy as np
import pandas as pd
from numpy import dot, log, zeros, pi
from numpy.linalg import inv
from scipy import optimize
from scipy.signal import lfilter
from scipy.stats import norm

from statsmodels.compat.pandas import Appender
import statsmodels.base.wrapper as wrap
from statsmodels.regression.linear_model import yule_walker, OLS
from statsmodels.tools.decorators import cache_readonly
from statsmodels.tools.numdiff import approx_hess_cs, approx_fprime_cs
from statsmodels.tools.sm_exceptions import SpecificationWarning
from statsmodels.tools.validation import array_like, string_like
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.arima_process import arma2ma
from statsmodels.tsa.base import tsa_model
from statsmodels.tsa.kalmanf import KalmanFilter
from statsmodels.tsa.tsatools import (lagmat, add_trend,
                                      _ar_transparams, _ar_invtransparams,
                                      _ma_transparams, _ma_invtransparams,
                                      unintegrate, unintegrate_levels)
from statsmodels.tsa.vector_ar import util

REPEATED_FIT_ERROR = """
Model has been fit using trend={0} and method={1}. These cannot be changed
in subsequent calls to `fit`. Instead, use a new instance of {mod}.
"""

_armax_notes = r"""
    Notes
    -----
    If exogenous variables are given, then the model that is fit is

    .. math::

       \phi(L)(y_t - X_t\beta) = \theta(L)\epsilon_t

    where :math:`\phi` and :math:`\theta` are polynomials in the lag
    operator, :math:`L`. This is the regression model with ARMA errors,
    or ARMAX model. This specification is used, whether or not the model
    is fit using conditional sum of square or maximum-likelihood, using
    the `method` argument in
    :meth:`statsmodels.tsa.arima_model.%(Model)s.fit`. Therefore, for
    now, `css` and `mle` refer to estimation methods only. This may
    change for the case of the `css` model in future versions.
"""

_arma_params = """endog : array_like
        The endogenous variable.
    order : iterable
        The (p,q) order of the model for the number of AR parameters,
        and MA parameters to use.
    exog : array_like, optional
        An optional array of exogenous variables. This should *not* include a
        constant or trend. You can specify this in the `fit` method."""

_arma_model = "Autoregressive Moving Average ARMA(p,q) Model"

_arima_model = "Autoregressive Integrated Moving Average ARIMA(p,d,q) Model"

_arima_params = """endog : array_like
        The endogenous variable.
    order : iterable
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters to use.
    exog : array_like, optional
        An optional array of exogenous variables. This should *not* include a
        constant or trend. You can specify this in the `fit` method."""

_predict_notes = """
        Notes
        -----
        Use the results predict method instead.
"""

_results_notes = """
        Notes
        -----
        It is recommended to use dates with the time-series models, as the
        below will probably make clear. However, if ARIMA is used without
        dates and/or `start` and `end` are given as indices, then these
        indices are in terms of the *original*, undifferenced series. Ie.,
        given some undifferenced observations::

         1970Q1, 1
         1970Q2, 1.5
         1970Q3, 1.25
         1970Q4, 2.25
         1971Q1, 1.2
         1971Q2, 4.1

        1970Q1 is observation 0 in the original series. However, if we fit an
        ARIMA(p,1,q) model then we lose this first observation through
        differencing. Therefore, the first observation we can forecast (if
        using exact MLE) is index 1. In the differenced series this is index
        0, but we refer to it as 1 from the original series.
"""

_predict = """
        %(Model)s model in-sample and out-of-sample prediction

        Parameters
        ----------
        %(params)s
        start : int, str, or datetime
            Zero-indexed observation number at which to start forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type.
        end : int, str, or datetime
            Zero-indexed observation number at which to end forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type. However, if the dates index does not
            have a fixed frequency, end must be an integer index if you
            want out of sample prediction.
        exog : array_like, optional
            If the model is an ARMAX and out-of-sample forecasting is
            requested, exog must be given. exog must be aligned so that exog[0]
            is used to produce the first out-of-sample forecast. The number of
            observation in exog should match the number of out-of-sample
            forecasts produced. If the length of exog does not match the number
            of forecasts, a SpecificationWarning is produced.
        dynamic : bool, optional
            The `dynamic` keyword affects in-sample prediction. If dynamic
            is False, then the in-sample lagged values are used for
            prediction. If `dynamic` is True, then in-sample forecasts are
            used in place of lagged dependent variables. The first forecast
            value is `start`.
        %(extra_params)s

        Returns
        -------
        %(returns)s
        %(extra_section)s
"""

_predict_returns = """predict : ndarray
            The predicted values.

"""

_arma_predict = _predict % {"Model": "ARMA",
                            "params": """params : array_like
            The fitted parameters of the model.""",
                            "extra_params": "",
                            "returns": _predict_returns,
                            "extra_section": _predict_notes}

_arma_results_predict = _predict % {"Model": "ARMA", "params": "",
                                    "extra_params": "",
                                    "returns": _predict_returns,
                                    "extra_section": _results_notes}
_arima_extras = """typ : str {'linear', 'levels'}

            - 'linear' : Linear prediction in terms of the differenced
              endogenous variables.
            - 'levels' : Predict the levels of the original endogenous
              variables.\n"""

_arima_predict = _predict % {"Model": "ARIMA",
                             "params": """params : array_like
            The fitted parameters of the model.""",
                             "extra_params": _arima_extras,
                             "returns": _predict_returns,
                             "extra_section": _predict_notes}

_arima_results_predict = _predict % {"Model": "ARIMA",
                                     "params": "",
                                     "extra_params": _arima_extras,
                                     "returns": _predict_returns,
                                     "extra_section": _results_notes}

_arima_plot_predict_example = """        Examples
        --------
        >>> import statsmodels.api as sm
        >>> import matplotlib.pyplot as plt
        >>> import pandas as pd
        >>>
        >>> dta = sm.datasets.sunspots.load_pandas().data[['SUNACTIVITY']]
        >>> dta.index = pd.date_range(start='1700', end='2009', freq='A')
        >>> res = sm.tsa.ARMA(dta, (3, 0)).fit()
        >>> fig, ax = plt.subplots()
        >>> ax = dta.loc['1950':].plot(ax=ax)
        >>> fig = res.plot_predict('1990', '2012', dynamic=True, ax=ax,
        ...                        plot_insample=False)
        >>> plt.show()

        .. plot:: plots/arma_predict_plot.py
"""

_plot_extras = """alpha : float, optional
            The confidence intervals for the forecasts are (1 - alpha)%
        plot_insample : bool, optional
            Whether to plot the in-sample series. Default is True.
        ax : matplotlib.Axes, optional
            Existing axes to plot with."""

_plot_predict = ("""
        Plot forecasts
                      """ + '\n'.join(_predict.split('\n')[2:])) % {
    "params": "",
    "extra_params": _plot_extras,
    "returns": """fig : Figure
            The plotted Figure instance""",
    "extra_section": ('\n' + _arima_plot_predict_example +
                      '\n' + _results_notes)
}

_arima_plot_predict = ("""
        Plot forecasts
                      """ + '\n'.join(_predict.split('\n')[2:])) % {
    "params": "",
    "extra_params": _plot_extras,
    "returns": """fig : Figure
            The plotted Figure instance""",
    "extra_section": ('\n' + _arima_plot_predict_example + '\n' +
                      '\n'.join(_results_notes.split('\n')[:3])
                      + ("""
        This is hard-coded to only allow plotting of the forecasts in levels.
""") + '\n'.join(_results_notes.split('\n')[3:]))
}


def cumsum_n(x, n):
    for _ in range(n):
        x = np.cumsum(x)

    return x


def _prediction_adjust_exog(exog, row_labels, dynamic, end):
    """
    Adjust exog if exog has dates that align with endog

    Parameters
    ----------
    exog : {array_like, None}
        The exog values
    row_labels : {pd.DatetimeIndex, None}
        Row labels from endog
    dynamic : bool
        Flag indicating whether dynamic forecasts are expected
    end : int
        Index of final in-sample observation
    """
    if exog is None:
        return None

    exog_start = 0
    exog_index = getattr(exog, 'index', None)
    exog_dates = isinstance(exog_index, pd.DatetimeIndex)
    endog_dates = isinstance(row_labels, pd.DatetimeIndex)
    date_adj = endog_dates and exog_dates and not dynamic
    if date_adj and row_labels.isin(exog_index).all():
        end_label = row_labels[end]
        exog_start = exog.index.get_loc(end_label) + 1

    exog = array_like(exog, 'exog', ndim=2)
    return exog[exog_start:]


def _check_arima_start(start, k_ar, k_diff, method, dynamic):
    if start < 0:
        raise ValueError("The start index %d of the original series "
                         "has been differenced away" % start)
    elif (dynamic or 'mle' not in method) and start < k_ar:
        raise ValueError("Start must be >= k_ar for conditional MLE "
                         "or dynamic forecast. Got %d" % start)


def _get_predict_out_of_sample(endog, p, q, k_trend, k_exog, start, errors,
                               trendparam, exparams, arparams, maparams, steps,
                               method, exog=None):
    """
    Returns endog, resid, mu of appropriate length for out of sample
    prediction.
    """
    if q:
        resid = np.zeros(q)
        if start and 'mle' in method or (start == p and not start == 0):
            resid[:q] = errors[start - q:start]
        elif start:
            resid[:q] = errors[start - q - p:start - p]
        else:
            resid[:q] = errors[-q:]
    else:
        resid = None

    y = endog
    if k_trend == 1:
        # use expectation not constant
        if k_exog > 0:
            # TODO: technically should only hold for MLE not
            #  conditional model. See #274.
            #  ensure 2-d for conformability
            if np.ndim(exog) == 1 and k_exog == 1:
                # have a 1d series of observations -> 2d
                exog = exog[:, None]
            elif np.ndim(exog) == 1:
                # should have a 1d row of exog -> 2d
                if len(exog) != k_exog:
                    raise ValueError("1d exog given and len(exog) != k_exog")
                exog = exog[None, :]
            X = lagmat(np.dot(exog, exparams), p, original='in', trim='both')
            mu = trendparam * (1 - arparams.sum())
            # arparams were reversed in unpack for ease later
            mu = mu + (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None]
        else:
            mu = trendparam * (1 - arparams.sum())
            mu = np.array([mu] * steps)
    elif k_exog > 0:
        X = np.dot(exog, exparams)
        X = lagmat(X, p, original='in', trim='both')
        mu = (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None]
    else:
        mu = np.zeros(steps)

    endog = np.zeros(p + steps - 1)

    if p and start:
        endog[:p] = y[start - p:start]
    elif p:
        endog[:p] = y[-p:]

    return endog, resid, mu


def _arma_predict_out_of_sample(params, steps, errors, p, q, k_trend, k_exog,
                                endog, exog=None, start=0, method='mle'):
    (trendparam, exparams,
     arparams, maparams) = _unpack_params(params, (p, q), k_trend,
                                          k_exog, reverse=True)
    endog, resid, mu = _get_predict_out_of_sample(endog, p, q, k_trend, k_exog,
                                                  start, errors, trendparam,
                                                  exparams, arparams,
Loading ...