Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexes / period / test_setops.py

import numpy as np
import pytest

import pandas as pd
from pandas import Index, PeriodIndex, date_range, period_range
import pandas.core.indexes.period as period
import pandas.util.testing as tm


def _permute(obj):
    return obj.take(np.random.permutation(len(obj)))


class TestPeriodIndex(object):

    def test_joins(self, join_type):
        index = period_range('1/1/2000', '1/20/2000', freq='D')

        joined = index.join(index[:-5], how=join_type)

        assert isinstance(joined, PeriodIndex)
        assert joined.freq == index.freq

    def test_join_self(self, join_type):
        index = period_range('1/1/2000', '1/20/2000', freq='D')

        res = index.join(index, how=join_type)
        assert index is res

    def test_join_does_not_recur(self):
        df = tm.makeCustomDataframe(
            3, 2, data_gen_f=lambda *args: np.random.randint(2),
            c_idx_type='p', r_idx_type='dt')
        s = df.iloc[:2, 0]

        res = s.index.join(df.columns, how='outer')
        expected = Index([s.index[0], s.index[1],
                          df.columns[0], df.columns[1]], object)
        tm.assert_index_equal(res, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_union(self, sort):
        # union
        other1 = pd.period_range('1/1/2000', freq='D', periods=5)
        rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
        expected1 = pd.period_range('1/1/2000', freq='D', periods=10)

        rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
        other2 = pd.period_range('1/4/2000', freq='D', periods=5)
        expected2 = pd.period_range('1/1/2000', freq='D', periods=8)

        rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
        other3 = pd.PeriodIndex([], freq='D')
        expected3 = pd.period_range('1/1/2000', freq='D', periods=5)

        rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
        other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
        expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00',
                                    '2000-01-01 11:00', '2000-01-01 12:00',
                                    '2000-01-01 13:00', '2000-01-02 09:00',
                                    '2000-01-02 10:00', '2000-01-02 11:00',
                                    '2000-01-02 12:00', '2000-01-02 13:00'],
                                   freq='H')

        rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
                               '2000-01-01 09:05'], freq='T')
        other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'
                                                     '2000-01-01 09:08'],
                                freq='T')
        expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
                                    '2000-01-01 09:05', '2000-01-01 09:08'],
                                   freq='T')

        rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
        other6 = pd.period_range('2000-04-01', freq='M', periods=7)
        expected6 = pd.period_range('2000-01-01', freq='M', periods=10)

        rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
        other7 = pd.period_range('1998-01-01', freq='A', periods=8)
        expected7 = pd.period_range('1998-01-01', freq='A', periods=10)

        rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
                               '1/5/2000', '1/4/2000'], freq='D')
        other8 = pd.period_range('1/6/2000', freq='D', periods=5)
        expected8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
                                    '1/5/2000', '1/4/2000', '1/6/2000',
                                    '1/7/2000', '1/8/2000', '1/9/2000',
                                    '1/10/2000'], freq='D')

        for rng, other, expected in [(rng1, other1, expected1),
                                     (rng2, other2, expected2),
                                     (rng3, other3, expected3),
                                     (rng4, other4, expected4),
                                     (rng5, other5, expected5),
                                     (rng6, other6, expected6),
                                     (rng7, other7, expected7),
                                     (rng8, other8, expected8)]:

            result_union = rng.union(other, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_union, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_union_misc(self, sort):
        index = period_range('1/1/2000', '1/20/2000', freq='D')

        result = index[:-5].union(index[10:], sort=sort)
        tm.assert_index_equal(result, index)

        # not in order
        result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
        if sort is None:
            tm.assert_index_equal(result, index)
        assert tm.equalContents(result, index)

        # raise if different frequencies
        index = period_range('1/1/2000', '1/20/2000', freq='D')
        index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
        with pytest.raises(period.IncompatibleFrequency):
            index.union(index2, sort=sort)

        msg = 'can only call with other PeriodIndex-ed objects'
        with pytest.raises(ValueError, match=msg):
            index.join(index.to_timestamp())

        index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
        with pytest.raises(period.IncompatibleFrequency):
            index.join(index3)

    def test_union_dataframe_index(self):
        rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
        s1 = pd.Series(np.random.randn(len(rng1)), rng1)

        rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
        s2 = pd.Series(np.random.randn(len(rng2)), rng2)
        df = pd.DataFrame({'s1': s1, 's2': s2})

        exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
        tm.assert_index_equal(df.index, exp)

    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection(self, sort):
        index = period_range('1/1/2000', '1/20/2000', freq='D')

        result = index[:-5].intersection(index[10:], sort=sort)
        tm.assert_index_equal(result, index[10:-5])

        # not in order
        left = _permute(index[:-5])
        right = _permute(index[10:])
        result = left.intersection(right, sort=sort)
        if sort is None:
            tm.assert_index_equal(result, index[10:-5])
        assert tm.equalContents(result, index[10:-5])

        # raise if different frequencies
        index = period_range('1/1/2000', '1/20/2000', freq='D')
        index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
        with pytest.raises(period.IncompatibleFrequency):
            index.intersection(index2, sort=sort)

        index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
        with pytest.raises(period.IncompatibleFrequency):
            index.intersection(index3, sort=sort)

    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection_cases(self, sort):
        base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')

        # if target has the same name, it is preserved
        rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
        expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
                                 name='idx')

        # if target name is different, it will be reset
        rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
        expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
                                 name=None)

        rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
        expected4 = PeriodIndex([], name='idx', freq='D')

        for (rng, expected) in [(rng2, expected2), (rng3, expected3),
                                (rng4, expected4)]:
            result = base.intersection(rng, sort=sort)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        # non-monotonic
        base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
                            '2011-01-03'], freq='D', name='idx')

        rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
                            '2011-02-02', '2011-02-03'],
                           freq='D', name='idx')
        expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
                                name='idx')

        rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
                            '2011-02-03'],
                           freq='D', name='other')
        expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
                                name=None)

        rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
        expected4 = PeriodIndex([], freq='D', name='idx')

        for (rng, expected) in [(rng2, expected2), (rng3, expected3),
                                (rng4, expected4)]:
            result = base.intersection(rng, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == 'D'

        # empty same freq
        rng = date_range('6/1/2000', '6/15/2000', freq='T')
        result = rng[0:0].intersection(rng)
        assert len(result) == 0

        result = rng.intersection(rng[0:0])
        assert len(result) == 0

    @pytest.mark.parametrize("sort", [None, False])
    def test_difference(self, sort):
        # diff
        period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000',
                      '1/4/2000']
        rng1 = pd.PeriodIndex(period_rng, freq='D')
        other1 = pd.period_range('1/6/2000', freq='D', periods=5)
        expected1 = rng1

        rng2 = pd.PeriodIndex(period_rng, freq='D')
        other2 = pd.period_range('1/4/2000', freq='D', periods=5)
        expected2 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000'],
                                   freq='D')

        rng3 = pd.PeriodIndex(period_rng, freq='D')
        other3 = pd.PeriodIndex([], freq='D')
        expected3 = rng3

        period_rng = ['2000-01-01 10:00', '2000-01-01 09:00',
                      '2000-01-01 12:00', '2000-01-01 11:00',
                      '2000-01-01 13:00']
        rng4 = pd.PeriodIndex(period_rng, freq='H')
        other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
        expected4 = rng4

        rng5 = pd.PeriodIndex(['2000-01-01 09:03', '2000-01-01 09:01',
                               '2000-01-01 09:05'], freq='T')
        other5 = pd.PeriodIndex(
            ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T')
        expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T')

        period_rng = ['2000-02-01', '2000-01-01', '2000-06-01',
                      '2000-07-01', '2000-05-01', '2000-03-01',
                      '2000-04-01']
        rng6 = pd.PeriodIndex(period_rng, freq='M')
        other6 = pd.period_range('2000-04-01', freq='M', periods=7)
        expected6 = pd.PeriodIndex(['2000-02-01', '2000-01-01', '2000-03-01'],
                                   freq='M')

        period_rng = ['2003', '2007', '2006', '2005', '2004']
        rng7 = pd.PeriodIndex(period_rng, freq='A')
        other7 = pd.period_range('1998-01-01', freq='A', periods=8)
        expected7 = pd.PeriodIndex(['2007', '2006'], freq='A')

        for rng, other, expected in [(rng1, other1, expected1),
                                     (rng2, other2, expected2),
                                     (rng3, other3, expected3),
                                     (rng4, other4, expected4),
                                     (rng5, other5, expected5),
                                     (rng6, other6, expected6),
                                     (rng7, other7, expected7), ]:
            result_difference = rng.difference(other, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_difference, expected)