Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / indexes / period / test_setops.py

import numpy as np
import pytest

import pandas as pd
from pandas import Index, PeriodIndex, date_range, period_range
import pandas.core.indexes.period as period
import pandas.util.testing as tm


def _permute(obj):
    return obj.take(np.random.permutation(len(obj)))


class TestPeriodIndex:
    def test_joins(self, join_type):
        index = period_range("1/1/2000", "1/20/2000", freq="D")

        joined = index.join(index[:-5], how=join_type)

        assert isinstance(joined, PeriodIndex)
        assert joined.freq == index.freq

    def test_join_self(self, join_type):
        index = period_range("1/1/2000", "1/20/2000", freq="D")

        res = index.join(index, how=join_type)
        assert index is res

    def test_join_does_not_recur(self):
        df = tm.makeCustomDataframe(
            3,
            2,
            data_gen_f=lambda *args: np.random.randint(2),
            c_idx_type="p",
            r_idx_type="dt",
        )
        s = df.iloc[:2, 0]

        res = s.index.join(df.columns, how="outer")
        expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object)
        tm.assert_index_equal(res, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_union(self, sort):
        # union
        other1 = pd.period_range("1/1/2000", freq="D", periods=5)
        rng1 = pd.period_range("1/6/2000", freq="D", periods=5)
        expected1 = pd.PeriodIndex(
            [
                "2000-01-06",
                "2000-01-07",
                "2000-01-08",
                "2000-01-09",
                "2000-01-10",
                "2000-01-01",
                "2000-01-02",
                "2000-01-03",
                "2000-01-04",
                "2000-01-05",
            ],
            freq="D",
        )

        rng2 = pd.period_range("1/1/2000", freq="D", periods=5)
        other2 = pd.period_range("1/4/2000", freq="D", periods=5)
        expected2 = pd.period_range("1/1/2000", freq="D", periods=8)

        rng3 = pd.period_range("1/1/2000", freq="D", periods=5)
        other3 = pd.PeriodIndex([], freq="D")
        expected3 = pd.period_range("1/1/2000", freq="D", periods=5)

        rng4 = pd.period_range("2000-01-01 09:00", freq="H", periods=5)
        other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
        expected4 = pd.PeriodIndex(
            [
                "2000-01-01 09:00",
                "2000-01-01 10:00",
                "2000-01-01 11:00",
                "2000-01-01 12:00",
                "2000-01-01 13:00",
                "2000-01-02 09:00",
                "2000-01-02 10:00",
                "2000-01-02 11:00",
                "2000-01-02 12:00",
                "2000-01-02 13:00",
            ],
            freq="H",
        )

        rng5 = pd.PeriodIndex(
            ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T"
        )
        other5 = pd.PeriodIndex(
            ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T"
        )
        expected5 = pd.PeriodIndex(
            [
                "2000-01-01 09:01",
                "2000-01-01 09:03",
                "2000-01-01 09:05",
                "2000-01-01 09:08",
            ],
            freq="T",
        )

        rng6 = pd.period_range("2000-01-01", freq="M", periods=7)
        other6 = pd.period_range("2000-04-01", freq="M", periods=7)
        expected6 = pd.period_range("2000-01-01", freq="M", periods=10)

        rng7 = pd.period_range("2003-01-01", freq="A", periods=5)
        other7 = pd.period_range("1998-01-01", freq="A", periods=8)
        expected7 = pd.PeriodIndex(
            [
                "2003",
                "2004",
                "2005",
                "2006",
                "2007",
                "1998",
                "1999",
                "2000",
                "2001",
                "2002",
            ],
            freq="A",
        )

        rng8 = pd.PeriodIndex(
            ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D"
        )
        other8 = pd.period_range("1/6/2000", freq="D", periods=5)
        expected8 = pd.PeriodIndex(
            [
                "1/3/2000",
                "1/2/2000",
                "1/1/2000",
                "1/5/2000",
                "1/4/2000",
                "1/6/2000",
                "1/7/2000",
                "1/8/2000",
                "1/9/2000",
                "1/10/2000",
            ],
            freq="D",
        )

        for rng, other, expected in [
            (rng1, other1, expected1),
            (rng2, other2, expected2),
            (rng3, other3, expected3),
            (rng4, other4, expected4),
            (rng5, other5, expected5),
            (rng6, other6, expected6),
            (rng7, other7, expected7),
            (rng8, other8, expected8),
        ]:

            result_union = rng.union(other, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_union, expected)

    @pytest.mark.parametrize("sort", [None, False])
    def test_union_misc(self, sort):
        index = period_range("1/1/2000", "1/20/2000", freq="D")

        result = index[:-5].union(index[10:], sort=sort)
        tm.assert_index_equal(result, index)

        # not in order
        result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
        if sort is None:
            tm.assert_index_equal(result, index)
        assert tm.equalContents(result, index)

        # raise if different frequencies
        index = period_range("1/1/2000", "1/20/2000", freq="D")
        index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
        with pytest.raises(period.IncompatibleFrequency):
            index.union(index2, sort=sort)

        index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
        with pytest.raises(period.IncompatibleFrequency):
            index.join(index3)

    def test_union_dataframe_index(self):
        rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M")
        s1 = pd.Series(np.random.randn(len(rng1)), rng1)

        rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M")
        s2 = pd.Series(np.random.randn(len(rng2)), rng2)
        df = pd.DataFrame({"s1": s1, "s2": s2})

        exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
        tm.assert_index_equal(df.index, exp)

    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection(self, sort):
        index = period_range("1/1/2000", "1/20/2000", freq="D")

        result = index[:-5].intersection(index[10:], sort=sort)
        tm.assert_index_equal(result, index[10:-5])

        # not in order
        left = _permute(index[:-5])
        right = _permute(index[10:])
        result = left.intersection(right, sort=sort)
        if sort is None:
            tm.assert_index_equal(result, index[10:-5])
        assert tm.equalContents(result, index[10:-5])

        # raise if different frequencies
        index = period_range("1/1/2000", "1/20/2000", freq="D")
        index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
        with pytest.raises(period.IncompatibleFrequency):
            index.intersection(index2, sort=sort)

        index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
        with pytest.raises(period.IncompatibleFrequency):
            index.intersection(index3, sort=sort)

    @pytest.mark.parametrize("sort", [None, False])
    def test_intersection_cases(self, sort):
        base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx")

        # if target has the same name, it is preserved
        rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx")
        expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx")

        # if target name is different, it will be reset
        rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other")
        expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None)

        rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
        expected4 = PeriodIndex([], name="idx", freq="D")

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng, sort=sort)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        # non-monotonic
        base = PeriodIndex(
            ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
            freq="D",
            name="idx",
        )

        rng2 = PeriodIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            freq="D",
            name="idx",
        )
        expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx")

        rng3 = PeriodIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            freq="D",
            name="other",
        )
        expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None)

        rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
        expected4 = PeriodIndex([], freq="D", name="idx")

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == "D"

        # empty same freq
        rng = date_range("6/1/2000", "6/15/2000", freq="T")
        result = rng[0:0].intersection(rng)
        assert len(result) == 0

        result = rng.intersection(rng[0:0])
        assert len(result) == 0

    @pytest.mark.parametrize("sort", [None, False])
    def test_difference(self, sort):
        # diff
        period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"]
        rng1 = pd.PeriodIndex(period_rng, freq="D")
        other1 = pd.period_range("1/6/2000", freq="D", periods=5)
        expected1 = rng1

        rng2 = pd.PeriodIndex(period_rng, freq="D")
        other2 = pd.period_range("1/4/2000", freq="D", periods=5)
        expected2 = pd.PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D")

        rng3 = pd.PeriodIndex(period_rng, freq="D")
        other3 = pd.PeriodIndex([], freq="D")
        expected3 = rng3

        period_rng = [
            "2000-01-01 10:00",
            "2000-01-01 09:00",
            "2000-01-01 12:00",
            "2000-01-01 11:00",
            "2000-01-01 13:00",
        ]
        rng4 = pd.PeriodIndex(period_rng, freq="H")
        other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
        expected4 = rng4

        rng5 = pd.PeriodIndex(
            ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T"
        )
        other5 = pd.PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T")
        expected5 = pd.PeriodIndex(["2000-01-01 09:03"], freq="T")

        period_rng = [
            "2000-02-01",
            "2000-01-01",
            "2000-06-01",
            "2000-07-01",
            "2000-05-01",
            "2000-03-01",
            "2000-04-01",
        ]
        rng6 = pd.PeriodIndex(period_rng, freq="M")
        other6 = pd.period_range("2000-04-01", freq="M", periods=7)
        expected6 = pd.PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M")

        period_rng = ["2003", "2007", "2006", "2005", "2004"]
        rng7 = pd.PeriodIndex(period_rng, freq="A")
        other7 = pd.period_range("1998-01-01", freq="A", periods=8)
        expected7 = pd.PeriodIndex(["2007", "2006"], freq="A")

        for rng, other, expected in [
            (rng1, other1, expected1),
            (rng2, other2, expected2),
            (rng3, other3, expected3),
            (rng4, other4, expected4),
            (rng5, other5, expected5),
            (rng6, other6, expected6),
            (rng7, other7, expected7),
        ]:
            result_difference = rng.difference(other, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_difference, expected)