Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / pandas   python

Repository URL to install this package:

Version: 1.1.1 

/ tests / groupby / test_allowlist.py

test methods relating to generic function evaluation
the so-called white/black lists

from string import ascii_lowercase

import numpy as np
import pytest

from pandas import DataFrame, Index, MultiIndex, Series, date_range
import pandas._testing as tm
from pandas.core.groupby.base import (


df_allowlist = [

def df_allowlist_fixture(request):
    return request.param

s_allowlist = [

def s_allowlist_fixture(request):
    return request.param

def mframe():
    index = MultiIndex(
        levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
        names=["first", "second"],
    return DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])

def df():
    return DataFrame(
            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
            "C": np.random.randn(8),
            "D": np.random.randn(8),

def df_letters():
    letters = np.array(list(ascii_lowercase))
    N = 10
    random_letters = letters.take(np.random.randint(0, 26, N))
    df = DataFrame(
            "floats": N / 10 * Series(np.random.random(N)),
            "letters": Series(random_letters),
    return df

@pytest.mark.parametrize("allowlist", [df_allowlist, s_allowlist])
def test_groupby_allowlist(df_letters, allowlist):
    df = df_letters
    if allowlist == df_allowlist:
        # dataframe
        obj = df_letters
        obj = df_letters["floats"]

    gb = obj.groupby(df.letters)

    assert set(allowlist) == set(gb._apply_allowlist)

def check_allowlist(obj, df, m):
    # check the obj for a particular allowlist m

    gb = obj.groupby(df.letters)

    f = getattr(type(gb), m)

    # name
        n = f.__name__
    except AttributeError:
    assert n == m

    # qualname
        n = f.__qualname__
    except AttributeError:
    assert n.endswith(m)

def test_groupby_series_allowlist(df_letters, s_allowlist_fixture):
    m = s_allowlist_fixture
    df = df_letters
    check_allowlist(df.letters, df, m)

def test_groupby_frame_allowlist(df_letters, df_allowlist_fixture):
    m = df_allowlist_fixture
    df = df_letters
    check_allowlist(df, df, m)

def raw_frame():
    index = MultiIndex(
        levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
        names=["first", "second"],
    raw_frame = DataFrame(
        np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
    raw_frame.iloc[1, [1, 2]] = np.nan
    raw_frame.iloc[7, [0, 1]] = np.nan
    return raw_frame

@pytest.mark.parametrize("op", AGG_FUNCTIONS)
@pytest.mark.parametrize("level", [0, 1])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("sort", [True, False])
def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort):
    # GH6944
    # GH 17537
    # explicitly test the allowlist methods

    if axis == 0:
        frame = raw_frame
        frame = raw_frame.T

        grouped = frame.groupby(level=level, axis=axis, sort=sort)
        result = getattr(grouped, op)(skipna=skipna)
        expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna)
        if sort:
            expected = expected.sort_index(axis=axis, level=level)
        tm.assert_frame_equal(result, expected)
        grouped = frame.groupby(level=level, axis=axis, sort=sort)
        result = getattr(grouped, op)()
        expected = getattr(frame, op)(level=level, axis=axis)
        if sort:
            expected = expected.sort_index(axis=axis, level=level)
        tm.assert_frame_equal(result, expected)

def test_groupby_blocklist(df_letters):
    df = df_letters
    s = df_letters.floats

    blocklist = [
    to_methods = [method for method in dir(df) if method.startswith("to_")]


    for bl in blocklist:
        for obj in (df, s):
            gb = obj.groupby(df.letters)

            # e.g., to_csv
            defined_but_not_allowed = (
                f"(?:^Cannot.+{repr(bl)}.+'{type(gb).__name__}'.+try "
                f"using the 'apply' method$)"

            # e.g., query, eval
            not_defined = (
                f"(?:^'{type(gb).__name__}' object has no attribute {repr(bl)}$)"

            msg = f"{defined_but_not_allowed}|{not_defined}"

            with pytest.raises(AttributeError, match=msg):
                getattr(gb, bl)

def test_tab_completion(mframe):
    grp = mframe.groupby(level="second")
    results = {v for v in dir(grp) if not v.startswith("_")}
    expected = {
    assert results == expected

def test_groupby_function_rename(mframe):
    grp = mframe.groupby(level="second")
    for name in ["sum", "prod", "min", "max", "first", "last"]:
        f = getattr(grp, name)
        assert f.__name__ == name

@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
def test_groupby_selection_with_methods(df):
    # some methods which require DatetimeIndex
    rng = date_range("2014", periods=len(df))
    df.index = rng

    g = df.groupby(["A"])[["C"]]
    g_exp = df[["C"]].groupby(df["A"])
    # TODO check groupby with > 1 col ?

    # methods which are called as .foo()
    methods = [

    for m in methods:
        res = getattr(g, m)()
        exp = getattr(g_exp, m)()

        # should always be frames!
        tm.assert_frame_equal(res, exp)

    # methods which aren't just .foo()
    tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
    tm.assert_frame_equal(g.dtypes, g_exp.dtypes)
    tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum()))

    tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean())
    tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc())

        g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3)

def test_all_methods_categorized(mframe):
    grp = mframe.groupby(mframe.iloc[:, 0])
    names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns)
    new_names = set(names)
    new_names -= reduction_kernels
    new_names -= transformation_kernels
    new_names -= groupby_other_methods

    assert not (reduction_kernels & transformation_kernels)
    assert not (reduction_kernels & groupby_other_methods)
    assert not (transformation_kernels & groupby_other_methods)

    # new public method?
    if new_names:
        msg = f"""
There are uncatgeorized methods defined on the Grouper class:

Was a new method recently added?

Every public method On Grouper must appear in exactly one the
following three lists defined in pandas.core.groupby.base:
- `reduction_kernels`
- `transformation_kernels`
- `groupby_other_methods`
see the comments in pandas/core/groupby/base.py for guidance on
how to fix this test.
        raise AssertionError(msg)

    # removed a public method?
    all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods
    if not (names == all_categorized):
        msg = f"""
Some methods which are supposed to be on the Grouper class
are missing:
{all_categorized - names}.

They're still defined in one of the lists that live in pandas/core/groupby/base.py.
If you removed a method, you should update them
        raise AssertionError(msg)