Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / pandas   python

Repository URL to install this package:

Version: 1.1.1 

/ tests / reshape / merge / test_merge_index_as_string.py

import numpy as np
import pytest

from pandas import DataFrame
import pandas._testing as tm


@pytest.fixture
def df1():
    return DataFrame(
        dict(
            outer=[1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4],
            inner=[1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2],
            v1=np.linspace(0, 1, 11),
        )
    )


@pytest.fixture
def df2():
    return DataFrame(
        dict(
            outer=[1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3],
            inner=[1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3],
            v2=np.linspace(10, 11, 12),
        )
    )


@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
def left_df(request, df1):
    """ Construct left test DataFrame with specified levels
    (any of 'outer', 'inner', and 'v1')
    """
    levels = request.param
    if levels:
        df1 = df1.set_index(levels)

    return df1


@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
def right_df(request, df2):
    """ Construct right test DataFrame with specified levels
    (any of 'outer', 'inner', and 'v2')
    """
    levels = request.param

    if levels:
        df2 = df2.set_index(levels)

    return df2


def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None):
    """
    Compute the expected merge result for the test case.

    This method computes the expected result of merging two DataFrames on
    a combination of their columns and index levels. It does so by
    explicitly dropping/resetting their named index levels, performing a
    merge on their columns, and then finally restoring the appropriate
    index in the result.

    Parameters
    ----------
    df_left : DataFrame
        The left DataFrame (may have zero or more named index levels)
    df_right : DataFrame
        The right DataFrame (may have zero or more named index levels)
    on : list of str
        The on parameter to the merge operation
    left_on : list of str
        The left_on parameter to the merge operation
    right_on : list of str
        The right_on parameter to the merge operation
    how : str
        The how parameter to the merge operation

    Returns
    -------
    DataFrame
        The expected merge result
    """
    # Handle on param if specified
    if on is not None:
        left_on, right_on = on, on

    # Compute input named index levels
    left_levels = [n for n in df_left.index.names if n is not None]
    right_levels = [n for n in df_right.index.names if n is not None]

    # Compute output named index levels
    output_levels = [i for i in left_on if i in right_levels and i in left_levels]

    # Drop index levels that aren't involved in the merge
    drop_left = [n for n in left_levels if n not in left_on]
    if drop_left:
        df_left = df_left.reset_index(drop_left, drop=True)

    drop_right = [n for n in right_levels if n not in right_on]
    if drop_right:
        df_right = df_right.reset_index(drop_right, drop=True)

    # Convert remaining index levels to columns
    reset_left = [n for n in left_levels if n in left_on]
    if reset_left:
        df_left = df_left.reset_index(level=reset_left)

    reset_right = [n for n in right_levels if n in right_on]
    if reset_right:
        df_right = df_right.reset_index(level=reset_right)

    # Perform merge
    expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how)

    # Restore index levels
    if output_levels:
        expected = expected.set_index(output_levels)

    return expected


@pytest.mark.parametrize(
    "on,how",
    [
        (["outer"], "inner"),
        (["inner"], "left"),
        (["outer", "inner"], "right"),
        (["inner", "outer"], "outer"),
    ],
)
def test_merge_indexes_and_columns_on(left_df, right_df, on, how):

    # Construct expected result
    expected = compute_expected(left_df, right_df, on=on, how=how)

    # Perform merge
    result = left_df.merge(right_df, on=on, how=how)
    tm.assert_frame_equal(result, expected, check_like=True)


@pytest.mark.parametrize(
    "left_on,right_on,how",
    [
        (["outer"], ["outer"], "inner"),
        (["inner"], ["inner"], "right"),
        (["outer", "inner"], ["outer", "inner"], "left"),
        (["inner", "outer"], ["inner", "outer"], "outer"),
    ],
)
def test_merge_indexes_and_columns_lefton_righton(
    left_df, right_df, left_on, right_on, how
):

    # Construct expected result
    expected = compute_expected(
        left_df, right_df, left_on=left_on, right_on=right_on, how=how
    )

    # Perform merge
    result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how)
    tm.assert_frame_equal(result, expected, check_like=True)


@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]])
def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):

    # Construct left_df
    left_df = df1.set_index(left_index)

    # Construct right_df
    right_df = df2.set_index(["outer", "inner"])

    # Result
    expected = (
        left_df.reset_index()
        .join(
            right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
        )
        .set_index(left_index)
    )

    # Perform join
    result = left_df.join(
        right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
    )

    tm.assert_frame_equal(result, expected, check_like=True)