tsa/base/tests/test_tsa_indexes.py · alkaline-ml/statsmodels

alkaline-ml / statsmodels python

Repository URL to install this package:
Version: 0.11.1

/ tsa / base / tests / test_tsa_indexes.py

"""
Test index support in time series models

1. Test support for passing / constructing the underlying index in __init__
2. Test wrapping of output using the underlying index
3. Test wrapping of prediction / forecasting using the underlying index or
   extensions of it.

Author: Chad Fulton
License: BSD-3
"""

import pytest
import warnings
import numpy as np
import pandas as pd

from numpy.testing import assert_equal, assert_raises

from statsmodels.tsa.base import tsa_model
from statsmodels.tools.sm_exceptions import ValueWarning

nobs = 5
base_dta = np.arange(nobs)
dta = [
    base_dta.tolist(),
    base_dta,
    pd.Series(base_dta),
    pd.DataFrame(base_dta)
]

base_date_indexes = [
    # (usual candidates)
    pd.date_range(start='1950-01-01', periods=nobs, freq='D'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='W'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='M'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='Q'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='A'),
    # (some more complicated frequencies)
    pd.date_range(start='1950-01-01', periods=nobs, freq='2Q'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='2QS'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='5s'),
    pd.date_range(start='1950-01-01', periods=nobs, freq='1D10min')]

# Note: we separate datetime indexes and period indexes because the
# date coercion does not handle string versions of PeriodIndex objects
# most of the time.
base_period_indexes = [
    pd.period_range(start='1950-01-01', periods=nobs, freq='D'),
    pd.period_range(start='1950-01-01', periods=nobs, freq='W'),
    pd.period_range(start='1950-01-01', periods=nobs, freq='M'),
    pd.period_range(start='1950-01-01', periods=nobs, freq='Q'),
    pd.period_range(start='1950-01-01', periods=nobs, freq='A')]
try:
    # Only later versions of pandas support these
    base_period_indexes += [
        pd.period_range(start='1950-01-01', periods=nobs, freq='2Q'),
        pd.period_range(start='1950-01-01', periods=nobs, freq='5s'),
        pd.period_range(start='1950-01-01', periods=nobs, freq='1D10min')]
except AttributeError:
    pass

date_indexes = [
    (x, None) for x in base_date_indexes]
period_indexes = [
    (x, None) for x in base_period_indexes]

numpy_datestr_indexes = [
    (x.map(str), x.freq) for x in base_date_indexes]
list_datestr_indexes = [
    (x.tolist(), y) for x, y in numpy_datestr_indexes]
series_datestr_indexes = [
    (pd.Series(x), y) for x, y in list_datestr_indexes]

numpy_datetime_indexes = [
    (pd.to_datetime(x).to_pydatetime(), x.freq)
    for x in base_date_indexes]
list_datetime_indexes = [
    (x.tolist(), y) for x, y in numpy_datetime_indexes]
series_datetime_indexes = [
    (pd.Series(x, dtype=object), y) for x, y in list_datetime_indexes]

series_timestamp_indexes = [
    (pd.Series(x), x.freq) for x in base_date_indexes]

# Supported increment indexes
supported_increment_indexes = [
    (pd.Int64Index(np.arange(nobs)), None),
    (pd.RangeIndex(start=0, stop=nobs, step=1), None),
    (pd.RangeIndex(start=-5, stop=nobs - 5, step=1), None),
    (pd.RangeIndex(start=0, stop=nobs * 6, step=6), None)]

# Supported date indexes
# Only the Int64Index and the `date_indexes` are valid without
# frequency information
supported_date_indexes = (
    numpy_datestr_indexes +
    list_datestr_indexes + series_datestr_indexes +
    numpy_datetime_indexes + list_datetime_indexes +
    series_datetime_indexes + series_timestamp_indexes)

# Unsupported (but still valid) indexes
unsupported_indexes = [
    # Non-incrementing-from-zero indexes
    (np.arange(1, nobs+1), None),
    (np.arange(nobs)[::-1], None),
    # Float indexes, even if they increment from zero
    (np.arange(nobs) * 1.0, None),
    # Non-date-string indexes
    ([x for x in 'abcde'], None),
    # Non-date-object indexes
    ([str, 1, 'a', -30.1, {}], None),
]

# Unsupported date indexes (i.e. those without inferrable frequency)
unsupported_date_indexes = [
    (['1950', '1952', '1941', '1954', '1991'], None),
    (['1950-01-01', '1950-01-02', '1950-01-03',
      '1950-01-04', '1950-01-06'], None)
]


def test_instantiation_valid():
    tsa_model.__warningregistry__ = {}

    # The primary goal of this test function is to make sure the
    # combinations that are supposed to be valid are actually valid, and
    # that valid but unsupported options give the appropriate warning
    # Secondarily, it also has some tests that invalid combinations raise
    # exceptions, although it's not intended to be comprehensive.
    #
    # Each of `endog`, `exog` can be in the following categories:
    # 0. None (only for exog)
    # 1. list
    # 2. numpy array
    # 3. pandas series
    # 4. pandas dataframe
    #
    # Each pandas index (of `endog`, `exog`, or passed to `dates`) can be:
    # 0. None
    # 1. RangeIndex (if applicable; i.e. if Pandas >= 0.18)
    # 2. Int64Index with values exactly equal to 0, 1, ..., nobs-1
    # 3. DatetimeIndex with frequency
    # 4. PeriodIndex with frequency
    # 5. Anything that does not fall into the above categories also should
    #    only raise an exception if it was passed to dates, and may trigger
    #    a warning otherwise.
    #
    # `date` can be one of the following:
    # 0. None
    # 2. Pandas index #2
    # 3. Pandas index #3
    # 4. List of date strings (requires freq)
    # 5. List of datetime objects (requires freq)
    # 6. Array of date strings (requires freq)
    # 7. Array of datetime objects (requires freq)
    # 8. Series of date strings (requires freq)
    # 9. Series of datetime objects (requires freq)
    # 10. Series of pandas timestamps (requires freq)
    # 11. Anything that does not fall into the above categories should raise
    #     an exception.
    #
    # `freq` can be:
    # 0. None
    # 1. Something that can be passed to `pd.to_offset`
    # 2. Anything that cannot should raise an Exception
    #
    # Each test will be denoted by:
    # endog.index:exog.index/date/freq where the corresponding
    # location is the integer from above; e.g. 1.0:0.0/9/1 corresponds to
    # - List endog (with no index)
    # - No exog
    # - Series of datetime objects
    # - Something valid for `pd.to_offset` (e.g. 'D', if that works with
    #   dates)
    #
    # Notice that the endog.index:exog.index really collapses to a single
    # element, which is the evaluated `row_label`. This is first the exog
    # index, if exists, then the endog index, if it exists, or None
    # otherwise. **Thus, we will not test `exog` here.**
    #
    # Example valid combinations of row_label/date/freq include:
    # - */0/0 (i.e. anything is valid if date and freq are not passed)
    # - */%/% where %/% denotes a valid date/freq combination (i.e. any
    #   row_label is valid if a valid date/freq combination is given)
    #
    # Example invalid combinations include:
    # - [1-2],[3-4].4/0/[1-2] (i.e. if have freq, then must have, or
    #   coerce, a date index)
    # - */[4-10]/0 (i.e. for some types of dates, freq must be passed)

    # Baseline: list, numpy endog with no dates, no freq
    for endog in dta[:2]:
        # No indexes, should not raise warnings
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            mod = tsa_model.TimeSeriesModel(endog)
            assert_equal(isinstance(mod._index,
                                    (pd.Int64Index, pd.RangeIndex)), True)
            assert_equal(mod._index_none, True)
            assert_equal(mod._index_dates, False)
            assert_equal(mod._index_generated, True)
            assert_equal(mod.data.dates, None)
            assert_equal(mod.data.freq, None)

    # Test list, numpy endog, pandas w/o index; with dates / freq argument
    for endog in dta:
        # Supported date indexes, should not raise warnings, do not need freq
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in date_indexes + period_indexes:
                mod = tsa_model.TimeSeriesModel(endog, dates=ix)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Supported date indexes, should not raise warnings, can use valid freq
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in date_indexes + period_indexes:
                mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Other supported indexes, with valid freq, should not raise warnings
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in supported_date_indexes:
                mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Since only supported indexes are valid `dates` arguments, everything
        # else is invalid here
        for ix, freq in supported_increment_indexes + unsupported_indexes:
            assert_raises(ValueError, tsa_model.TimeSeriesModel, endog,
                          dates=ix)

    # Test pandas (Series, DataFrame); with index (no dates/freq argument)
    for base_endog in dta[2:4]:
        # DatetimeIndex and PeriodIndex, should not raise warnings
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in date_indexes + period_indexes:
                endog = base_endog.copy()
                endog.index = ix

                mod = tsa_model.TimeSeriesModel(endog)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Increment index (this is a "supported" index in the sense that it
        # does not raise a warning, but obviously not a date index)
        endog = base_endog.copy()
        endog.index = supported_increment_indexes[0][0]

        mod = tsa_model.TimeSeriesModel(endog)
        assert_equal(type(mod._index) == pd.Int64Index, True)
        assert_equal(mod._index_none, False)
        assert_equal(mod._index_dates, False)
        assert_equal(mod._index_generated, False)
        assert_equal(mod._index_freq, None)
        assert_equal(mod.data.dates, None)
        assert_equal(mod.data.freq, None)

        # RangeIndex (start=0, end=nobs, so equivalent to increment index)
        endog = base_endog.copy()
        endog.index = supported_increment_indexes[1][0]

        mod = tsa_model.TimeSeriesModel(endog)
        assert_equal(type(mod._index) == pd.RangeIndex, True)
        assert_equal(mod._index_none, False)
        assert_equal(mod._index_dates, False)
        assert_equal(mod._index_generated, False)
        assert_equal(mod._index_freq, None)
        assert_equal(mod.data.dates, None)
        assert_equal(mod.data.freq, None)

        # Supported indexes *when a freq is given*, should not raise a warning
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in supported_date_indexes:
                endog = base_endog.copy()
                endog.index = ix

                mod = tsa_model.TimeSeriesModel(endog, freq=freq)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Unsupported (or any) indexes to the given series, *when a supported
        # date and freq is given*, should not raise a warning
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            for ix, freq in supported_date_indexes:
                endog = base_endog.copy()
                endog.index = unsupported_indexes[0][0]

                mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(
                    isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
                    True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)
                assert_equal(mod.data.freq, freq)

        # Date indexes with inferrable freq, but no given freq, should all give
        # warnings
        message = ('No frequency information was provided,'
                   ' so inferred frequency %s will be used.')
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')

            for ix, freq in supported_date_indexes:
                endog = base_endog.copy()
                endog.index = ix
                mod = tsa_model.TimeSeriesModel(endog)
                if freq is None:
                    freq = ix.freq
                if not isinstance(freq, str):
                    freq = freq.freqstr
                assert_equal(type(mod._index) == pd.DatetimeIndex, True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, True)
                assert_equal(mod._index_generated, False)
                assert_equal(mod._index.freq, mod._index_freq)
                assert_equal(mod.data.dates.equals(mod._index), True)

                # Note: here, we need to hedge the test a little bit because
                # inferred frequencies are not always the same as the original
                # frequency. From the examples above, when the actual freq is
                # 2QS-OCT, the inferred freq is 2QS-JAN. This is an issue with
                # inferred frequencies, but since we are warning the user, it's
                # not a failure of the code. Thus we only test the "major" part
                # of the freq, and just test that the right message is given
                # (even though it will not have the actual freq of the data in
                # it).
                assert_equal(mod.data.freq.split('-')[0], freq.split('-')[0])
                assert_equal(str(w[-1].message), message % mod.data.freq)

        # Unsupported (but valid) indexes, should all give warnings
        message = ('An unsupported index was provided and will be'
                   ' ignored when e.g. forecasting.')
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')

            for ix, freq in unsupported_indexes:
                endog = base_endog.copy()
                endog.index = ix
                mod = tsa_model.TimeSeriesModel(endog)
                assert_equal(isinstance(mod._index,
                             (pd.Int64Index, pd.RangeIndex)), True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, False)
                assert_equal(mod._index_generated, True)
                assert_equal(mod._index_freq, None)
                assert_equal(mod.data.dates, None)
                assert_equal(mod.data.freq, None)

                assert_equal(str(w[0].message), message)

        # Date indexes without inferrable freq, and with no given freq, should
        # all give warnings
        message = ('A date index has been provided, but it has no'
                   ' associated frequency information and so will be'
                   ' ignored when e.g. forecasting.')
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')

            for ix, freq in unsupported_date_indexes:
                endog = base_endog.copy()
                endog.index = ix
                mod = tsa_model.TimeSeriesModel(endog)
                assert_equal(isinstance(mod._index,
                             (pd.Int64Index, pd.RangeIndex)), True)
                assert_equal(mod._index_none, False)
                assert_equal(mod._index_dates, False)
                assert_equal(mod._index_generated, True)
                assert_equal(mod._index_freq, None)
                assert_equal(mod.data.dates, None)
                assert_equal(mod.data.freq, None)

                assert_equal(str(w[0].message), message)

    # Test (invalid) freq with no index
    endog = dta[0]
    assert_raises(ValueError, tsa_model.TimeSeriesModel, endog,
                  freq=date_indexes[1][0].freq)

    # Test conflicting index, freq specifications
    endog = dta[2].copy()
    endog.index = date_indexes[0][0]
    assert_raises(ValueError, tsa_model.TimeSeriesModel, endog,
                  freq=date_indexes[1][0].freq)

    # Test unsupported index, but a freq specification
    endog = dta[2].copy()
    endog.index = unsupported_indexes[0][0]
    assert_raises(ValueError, tsa_model.TimeSeriesModel, endog,
                  freq=date_indexes[1][0].freq)

    # Test index that can coerce to date time but incorrect freq
    endog = dta[2].copy()
    endog.index = numpy_datestr_indexes[0][0]
    assert_raises(ValueError, tsa_model.TimeSeriesModel, endog,
                  freq=date_indexes[1][0].freq)


def test_prediction_increment_unsupported():
    # a. Generated from unsupported index
    endog = dta[2].copy()
    endog.index = unsupported_indexes[-2][0]
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('ignore')
        mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; notice that since this is an in-sample
    # prediction, the index returned is the (unsupported) original index
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index.equals(mod.data.row_labels), True)

    # Negative index: [-2, end]; notice that since this is an in-sample
    # prediction, the index returned is a piece of the (unsupported)
    # original index
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index.equals(mod.data.row_labels[3:]), True)

    # Forecasting: [1, 5], notice that since an unsupported index was given,
    # a warning will be issued
    start_key = 1
    end_key = nobs
    message = ('No supported index is available.'
               ' Prediction results will be given with'
               ' an integer index beginning at `start`.')
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')

        start, end, out_of_sample, prediction_index = (
            mod._get_prediction_index(start_key, end_key))

        assert_equal(str(w[0].message), message)

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    assert_equal(prediction_index.equals(pd.Index(np.arange(1, 6))), True)

    # Test getting a location that exists in the (internal) index
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.RangeIndex(start=0, stop=3, step=1)
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the (internal) index
    # when using the function that alternatively falls back to the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.RangeIndex(start=0, stop=3, step=1)
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the given (unsupported) index
    # Note that the returned index is now like the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc('c')
    assert_equal(loc, 2)
    desired_index = mod.data.row_labels[:3]
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)


def test_prediction_increment_nonpandas():
    endog = dta[0]
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; since there was no index at all and the data
    # is not Pandas, the returned prediction_index is None
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index is None, True)

    # Negative index: [-2, end]; since there was no index at all and the data
    # is not Pandas, the returned prediction_index is None
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index is None, True)

    # Forecasting: [1, 5]; since there was no index at all and the data
    # is not Pandas, the returned prediction_index is None
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    assert_equal(prediction_index is None, True)


    # Test getting a location that exists in the (internal) index
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.RangeIndex(start=0, stop=3, step=1)
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the (internal) index
    # when using the function that alternatively falls back to the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.RangeIndex(start=0, stop=3, step=1)
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)


def test_prediction_increment_pandas_noindex():
    endog = dta[2].copy()
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; since there was no index and the data is
    # Pandas, the index is the generated incrementing index, and no warning is
    # issued
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index.equals(mod._index), True)

    # Negative index: [-2, end]; since there was no index and the data is
    # Pandas, the index is the generated incrementing index, and no warning is
    # issued
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert_equal(prediction_index.equals(mod._index[3:]), True)

    # Forecasting: [1, 5]; since there was no index and the data is
    # Pandas, the index is the generated incrementing index, and no warning is
    # issued
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    assert_equal(prediction_index.equals(pd.Index(np.arange(1, 6))), True)


def test_prediction_increment_pandas_dates_daily():
    # Date-based index
    endog = dta[2].copy()
    endog.index = date_indexes[0][0]  # Daily, 1950-01-01, 1950-01-02, ...
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; the index is the date index
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index), True)

    # In-sample prediction: [0, 3]; the index is a subset of the date index
    start_key = 0
    end_key = 3
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, 3)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[:4]), True)

    # Negative index: [-2, end]
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[3:]), True)

    # Forecasting: [1, 5]; the index is an extended version of the date index
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    desired_index = pd.date_range(start='1950-01-02', periods=5, freq='D')
    assert_equal(prediction_index.equals(desired_index), True)

    # Date-based keys

    # In-sample prediction (equivalent to [1, 3])
    start_key = '1950-01-02'
    end_key = '1950-01-04'
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 3)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[1:4]), True)

    # Out-of-sample forecasting (equivalent to [0, 5])
    start_key = '1950-01-01'
    end_key = '1950-01-08'
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 3)
    desired_index = pd.date_range(start='1950-01-01', periods=8, freq='D')
    assert_equal(prediction_index.equals(desired_index), True)

    # Test getting a location that exists in the (internal) index
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.date_range(start='1950-01-01', periods=3, freq='D')
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the (internal) index
    # when using the function that alternatively falls back to the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.date_range(start='1950-01-01', periods=3, freq='D')
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the given (unsupported) index
    # Note that the returned index is now like the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc('1950-01-03')
    assert_equal(loc, 2)
    desired_index = mod.data.row_labels[:3]
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)


def test_prediction_increment_pandas_dates_monthly():
    # Date-based index
    endog = dta[2].copy()
    endog.index = date_indexes[2][0]  # Monthly, 1950-01, 1950-02, ...
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; the index is the date index
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index), True)

    # In-sample prediction: [0, 3]; the index is a subset of the date index
    start_key = 0
    end_key = 3
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, 3)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[:4]), True)

    # Negative index: [-2, end]
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[3:]), True)

    # Forecasting: [1, 5]; the index is an extended version of the date index
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    desired_index = pd.date_range(start='1950-02', periods=5, freq='M')
    assert_equal(prediction_index.equals(desired_index), True)

    # Date-based keys

    # In-sample prediction (equivalent to [1, 3])
    start_key = '1950-02'
    end_key = '1950-04'
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 3)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[1:4]), True)

    # Out-of-sample forecasting (equivalent to [0, 5])
    start_key = '1950-01'
    end_key = '1950-08'
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 3)
    desired_index = pd.date_range(start='1950-01', periods=8, freq='M')
    assert_equal(prediction_index.equals(desired_index), True)

    # Test getting a location that exists in the (internal) index
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.date_range(start='1950-01', periods=3, freq='M')
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the (internal) index
    # when using the function that alternatively falls back to the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.date_range(start='1950-01', periods=3, freq='M')
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)

    # Test getting a location that exists in the given (unsupported) index
    # Note that the returned index is now like the row labels
    loc, index, index_was_expanded = mod._get_index_label_loc('1950-03')
    assert_equal(loc, slice(2, 3, None))
    desired_index = mod.data.row_labels[:3]
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)


def test_prediction_increment_pandas_dates_nanosecond():
    # Date-based index
    endog = dta[2].copy()
    endog.index = pd.date_range(start='1970-01-01', periods=len(endog),
                                freq='N')
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]; the index is the date index
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs-1)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index), True)

    # Negative index: [-2, end]
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    assert type(prediction_index) is type(endog.index)  # noqa: E721
    assert_equal(prediction_index.equals(mod._index[3:]), True)

    # Forecasting: [1, 5]; the index is an extended version of the date index
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    desired_index = pd.date_range(start='1970-01-01', periods=6, freq='N')[1:]
    assert_equal(prediction_index.equals(desired_index), True)

    # Date-based keys
    start_key = pd.Timestamp('1970-01-01')
    end_key = pd.Timestamp(start_key.value + 7)
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 3)
    desired_index = pd.date_range(start='1970-01-01', periods=8, freq='N')
    assert_equal(prediction_index.equals(desired_index), True)


def test_range_index():
    tsa_model.__warningregistry__ = {}

    endog = pd.Series(np.random.normal(size=5))
    assert_equal(isinstance(endog.index, pd.RangeIndex), True)
    # Warning should not be given
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')
        mod = tsa_model.TimeSeriesModel(endog)
        assert_equal(len(w), 0)


def test_prediction_rangeindex():
    index = supported_increment_indexes[2][0]
    endog = pd.Series(dta[0], index=index)
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs - 1)
    assert_equal(out_of_sample, 0)
    desired_index = pd.RangeIndex(start=-5, stop=0, step=1)
    assert_equal(prediction_index.equals(desired_index), True)

    # Negative index: [-2, end]
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    desired_index = pd.RangeIndex(start=-2, stop=0, step=1)
    assert_equal(prediction_index.equals(desired_index), True)

    # Forecasting: [1, 5]
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    desired_index = pd.RangeIndex(start=-4, stop=1, step=1)
    assert_equal(prediction_index.equals(desired_index), True)


def test_prediction_rangeindex_withstep():
    index = supported_increment_indexes[3][0]
    endog = pd.Series(dta[0], index=index)
    mod = tsa_model.TimeSeriesModel(endog)

    # Tests three common use cases: basic prediction, negative indexes, and
    # out-of-sample indexes.

    # Basic prediction: [0, end]
    start_key = 0
    end_key = None
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 0)
    assert_equal(end, nobs - 1)
    assert_equal(out_of_sample, 0)
    desired_index = pd.RangeIndex(start=0, stop=nobs * 6, step=6)
    assert_equal(prediction_index.equals(desired_index), True)

    # Negative index: [-2, end]
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 3)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 0)
    desired_index = pd.RangeIndex(start=3 * 6, stop=nobs * 6, step=6)
    assert_equal(prediction_index.equals(desired_index), True)

    # Forecasting: [1, 5]
    start_key = 1
    end_key = nobs
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    assert_equal(start, 1)
    assert_equal(end, 4)
    assert_equal(out_of_sample, 1)
    desired_index = pd.RangeIndex(start=1 * 6, stop=(nobs + 1) * 6, step=6)
    assert_equal(prediction_index.equals(desired_index), True)

    # Test getting a location that exists in the index
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    desired_index = pd.RangeIndex(start=0, stop=3 * 6, step=6)
    assert_equal(index.equals(desired_index), True)
    assert_equal(index_was_expanded, False)


def test_custom_index():
    tsa_model.__warningregistry__ = {}

    endog = pd.Series(np.random.normal(size=5),
                      index=['a', 'b', 'c', 'd', 'e'])
    message = ('An unsupported index was provided and will be ignored when'
               ' e.g. forecasting.')
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')

        mod = tsa_model.TimeSeriesModel(endog)
        assert_equal(str(w[0].message), message)
    start_key = -2
    end_key = -1
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key))

    # Test the default output index
    assert_equal(prediction_index.equals(pd.Index(['d', 'e'])), True)

    # Test custom output index
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key, index=['f', 'g']))
    assert_equal(prediction_index.equals(pd.Index(['f', 'g'])), True)

    # Test getting a location in the index w/o fallback to row labels
    loc, index, index_was_expanded = mod._get_index_loc(2)
    assert_equal(loc, 2)
    assert_equal(index.equals(pd.RangeIndex(0, 3)), True)
    assert_equal(index_was_expanded, False)
    assert_equal(index_was_expanded, False)

    # Test getting an invalid location in the index w/ fallback to row labels
    with pytest.raises(KeyError):
        mod._get_index_loc('c')

    # Test getting a location in the index w/ fallback to row labels
    loc, index, index_was_expanded = mod._get_index_label_loc('c')
    assert_equal(loc, 2)
    assert_equal(index.equals(pd.Index(['a', 'b', 'c'])), True)
    assert_equal(index_was_expanded, False)

    # Test getting an invalid location in the index w/ fallback to row labels
    with pytest.raises(KeyError):
        mod._get_index_label_loc('aa')

    # Test out-of-sample
    start_key = 4
    end_key = 5
    message = ('No supported index is available.'
               ' Prediction results will be given with'
               ' an integer index beginning at `start`.')
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')

        start, end, out_of_sample, prediction_index = (
            mod._get_prediction_index(start_key, end_key))
        assert_equal(prediction_index.equals(pd.Index([4, 5])), True)
        assert_equal(str(w[0].message), message)

    # Test out-of-sample custom index
    start, end, out_of_sample, prediction_index = (
        mod._get_prediction_index(start_key, end_key, index=['f', 'g']))
    assert_equal(prediction_index.equals(pd.Index(['f', 'g'])), True)

    # Test invalid custom index
    assert_raises(ValueError, mod._get_prediction_index, start_key, end_key,
                  index=['f', 'g', 'h'])


def test_nonmonotonic_periodindex():
    # Create a nonmonotonic period index
    tmp = pd.period_range(start=2000, end=2002, freq='A')
    index = tmp.tolist() + tmp.tolist()
    endog = pd.Series(np.zeros(len(index)), index=index)

    message = ('A date index has been provided, but it is not'
               ' monotonic and so will be ignored when e.g.'
               ' forecasting.')
    with pytest.warns(ValueWarning, match=message):
        tsa_model.TimeSeriesModel(endog)


@pytest.mark.xfail(reason='Pandas PeriodIndex.is_full does not yet work for'
                          ' all frequencies (e.g. frequencies with a'
                          ' multiplier, like "2Q").')
def test_nonfull_periodindex():
    index = pd.PeriodIndex(['2000-01', '2000-03'], freq='M')
    endog = pd.Series(np.zeros(len(index)), index=index)

    message = ('A Period index has been provided, but it is not'
               ' full and so will be ignored when e.g.'
               ' forecasting.')
    with pytest.warns(ValueWarning, match=message):
        tsa_model.TimeSeriesModel(endog)


def test_get_index_loc_quarterly():
    # See GH#6339

    ix = pd.date_range('2000Q1', periods=8, freq='QS')
    endog = pd.Series(np.zeros(8), index=ix)

    mod = tsa_model.TimeSeriesModel(endog)
    loc, index, _ = mod._get_index_loc('2003Q2')
    assert_equal(index[loc], pd.Timestamp('2003Q2'))
alkaline-ml / statsmodels python

Version: 0.11.1

/ tsa / base / tests / test_tsa_indexes.py

Products

About

Resources

Contact Gemfury