Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexes / interval / test_interval_range.py

from __future__ import division

from datetime import timedelta

import numpy as np
import pytest

from pandas.core.dtypes.common import is_integer

from pandas import (
    DateOffset, Interval, IntervalIndex, Timedelta, Timestamp, date_range,
    interval_range, timedelta_range)
import pandas.util.testing as tm

from pandas.tseries.offsets import Day


@pytest.fixture(scope='class', params=[None, 'foo'])
def name(request):
    return request.param


class TestIntervalRange(object):

    @pytest.mark.parametrize('freq, periods', [
        (1, 100), (2.5, 40), (5, 20), (25, 4)])
    def test_constructor_numeric(self, closed, name, freq, periods):
        start, end = 0, 100
        breaks = np.arange(101, step=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        result = interval_range(
            start=start, end=end, periods=periods, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
    @pytest.mark.parametrize('freq, periods', [
        ('D', 364), ('2D', 182), ('22D18H', 16), ('M', 11)])
    def test_constructor_timestamp(self, closed, name, freq, periods, tz):
        start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz)
        breaks = date_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        if not breaks.freq.isAnchored() and tz is None:
            # matches expected only for non-anchored offsets and tz naive
            # (anchored/DST transitions cause unequal spacing in expected)
            result = interval_range(start=start, end=end, periods=periods,
                                    name=name, closed=closed)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('freq, periods', [
        ('D', 100), ('2D12H', 40), ('5D', 20), ('25D', 4)])
    def test_constructor_timedelta(self, closed, name, freq, periods):
        start, end = Timedelta('0 days'), Timedelta('100 days')
        breaks = timedelta_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        result = interval_range(
            start=start, end=end, periods=periods, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('start, end, freq, expected_endpoint', [
        (0, 10, 3, 9),
        (0, 10, 1.5, 9),
        (0.5, 10, 3, 9.5),
        (Timedelta('0D'), Timedelta('10D'), '2D4H', Timedelta('8D16H')),
        (Timestamp('2018-01-01'),
         Timestamp('2018-02-09'),
         'MS',
         Timestamp('2018-02-01')),
        (Timestamp('2018-01-01', tz='US/Eastern'),
         Timestamp('2018-01-20', tz='US/Eastern'),
         '5D12H',
         Timestamp('2018-01-17 12:00:00', tz='US/Eastern'))])
    def test_early_truncation(self, start, end, freq, expected_endpoint):
        # index truncates early if freq causes end to be skipped
        result = interval_range(start=start, end=end, freq=freq)
        result_endpoint = result.right[-1]
        assert result_endpoint == expected_endpoint

    @pytest.mark.parametrize('start, end, freq', [
        (0.5, None, None),
        (None, 4.5, None),
        (0.5, None, 1.5),
        (None, 6.5, 1.5)])
    def test_no_invalid_float_truncation(self, start, end, freq):
        # GH 21161
        if freq is None:
            breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
        else:
            breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
        expected = IntervalIndex.from_breaks(breaks)

        result = interval_range(start=start, end=end, periods=4, freq=freq)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('start, mid, end', [
        (Timestamp('2018-03-10', tz='US/Eastern'),
         Timestamp('2018-03-10 23:30:00', tz='US/Eastern'),
         Timestamp('2018-03-12', tz='US/Eastern')),
        (Timestamp('2018-11-03', tz='US/Eastern'),
         Timestamp('2018-11-04 00:30:00', tz='US/Eastern'),
         Timestamp('2018-11-05', tz='US/Eastern'))])
    def test_linspace_dst_transition(self, start, mid, end):
        # GH 20976: linspace behavior defined from start/end/periods
        # accounts for the hour gained/lost during DST transition
        result = interval_range(start=start, end=end, periods=2)
        expected = IntervalIndex.from_breaks([start, mid, end])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('freq', [2, 2.0])
    @pytest.mark.parametrize('end', [10, 10.0])
    @pytest.mark.parametrize('start', [0, 0.0])
    def test_float_subtype(self, start, end, freq):
        # Has float subtype if any of start/end/freq are float, even if all
        # resulting endpoints can safely be upcast to integers

        # defined from start/end/freq
        index = interval_range(start=start, end=end, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + end + freq) else 'float64'
        assert result == expected

        # defined from start/periods/freq
        index = interval_range(start=start, periods=5, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + freq) else 'float64'
        assert result == expected

        # defined from end/periods/freq
        index = interval_range(end=end, periods=5, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(end + freq) else 'float64'
        assert result == expected

        # GH 20976: linspace behavior defined from start/end/periods
        index = interval_range(start=start, end=end, periods=5)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + end) else 'float64'
        assert result == expected

    def test_constructor_coverage(self):
        # float value for periods
        expected = interval_range(start=0, periods=10)
        result = interval_range(start=0, periods=10.5)
        tm.assert_index_equal(result, expected)

        # equivalent timestamp-like start/end
        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
        expected = interval_range(start=start, end=end)

        result = interval_range(start=start.to_pydatetime(),
                                end=end.to_pydatetime())
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start.asm8, end=end.asm8)
        tm.assert_index_equal(result, expected)

        # equivalent freq with timestamp
        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
                      DateOffset(days=1)]
        for freq in equiv_freq:
            result = interval_range(start=start, end=end, freq=freq)
            tm.assert_index_equal(result, expected)

        # equivalent timedelta-like start/end
        start, end = Timedelta(days=1), Timedelta(days=10)
        expected = interval_range(start=start, end=end)

        result = interval_range(start=start.to_pytimedelta(),
                                end=end.to_pytimedelta())
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start.asm8, end=end.asm8)
        tm.assert_index_equal(result, expected)

        # equivalent freq with timedelta
        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
        for freq in equiv_freq:
            result = interval_range(start=start, end=end, freq=freq)
            tm.assert_index_equal(result, expected)

    def test_errors(self):
        # not enough params
        msg = ('Of the four parameters: start, end, periods, and freq, '
               'exactly three must be specified')

        with pytest.raises(ValueError, match=msg):
            interval_range(start=0)

        with pytest.raises(ValueError, match=msg):
            interval_range(end=5)

        with pytest.raises(ValueError, match=msg):
            interval_range(periods=2)

        with pytest.raises(ValueError, match=msg):
            interval_range()

        # too many params
        with pytest.raises(ValueError, match=msg):
            interval_range(start=0, end=5, periods=6, freq=1.5)

        # mixed units
        msg = 'start, end, freq need to be type compatible'
        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=Timestamp('20130101'), freq=2)

        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=Timedelta('1 day'), freq=2)

        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=10, freq='D')

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timestamp('20130101'), end=10, freq='D')

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timedelta('1 day'), freq='D')

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timestamp('20130110'), freq=2)

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timedelta('1 day'), end=10, freq='D')

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timestamp('20130110'), freq='D')

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timedelta('10 days'), freq=2)

        # invalid periods
        msg = 'periods must be a number, got foo'
        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, periods='foo')

        # invalid start
        msg = 'start must be numeric or datetime-like, got foo'
        with pytest.raises(ValueError, match=msg):
            interval_range(start='foo', periods=10)

        # invalid end
        msg = r'end must be numeric or datetime-like, got \(0, 1\]'
        with pytest.raises(ValueError, match=msg):
            interval_range(end=Interval(0, 1), periods=10)

        # invalid freq for datetime-like
        msg = 'freq must be numeric or convertible to DateOffset, got foo'
        with pytest.raises(ValueError, match=msg):
            interval_range(start=0, end=10, freq='foo')

        with pytest.raises(ValueError, match=msg):
            interval_range(start=Timestamp('20130101'), periods=10, freq='foo')

        with pytest.raises(ValueError, match=msg):
            interval_range(end=Timedelta('1 day'), periods=10, freq='foo')

        # mixed tz
        start = Timestamp('2017-01-01', tz='US/Eastern')
        end = Timestamp('2017-01-07', tz='US/Pacific')
        msg = 'Start and end cannot both be tz-aware with different timezones'
        with pytest.raises(TypeError, match=msg):
            interval_range(start=start, end=end)