Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / sparse / test_groupby.py

import numpy as np
import pytest

import pandas as pd
import pandas.util.testing as tm


@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
class TestSparseGroupBy:
    def setup_method(self, method):
        self.dense = pd.DataFrame(
            {
                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
                "C": np.random.randn(8),
                "D": np.random.randn(8),
                "E": [np.nan, np.nan, 1, 2, np.nan, 1, np.nan, np.nan],
            }
        )
        self.sparse = self.dense.to_sparse()

    def test_first_last_nth(self):
        # tests for first / last / nth
        sparse_grouped = self.sparse.groupby("A")
        dense_grouped = self.dense.groupby("A")

        sparse_grouped_first = sparse_grouped.first()
        sparse_grouped_last = sparse_grouped.last()
        sparse_grouped_nth = sparse_grouped.nth(1)

        dense_grouped_first = pd.DataFrame(dense_grouped.first().to_sparse())
        dense_grouped_last = pd.DataFrame(dense_grouped.last().to_sparse())
        dense_grouped_nth = pd.DataFrame(dense_grouped.nth(1).to_sparse())

        tm.assert_frame_equal(sparse_grouped_first, dense_grouped_first)
        tm.assert_frame_equal(sparse_grouped_last, dense_grouped_last)
        tm.assert_frame_equal(sparse_grouped_nth, dense_grouped_nth)

    def test_aggfuncs(self):
        sparse_grouped = self.sparse.groupby("A")
        dense_grouped = self.dense.groupby("A")

        result = sparse_grouped.mean().to_sparse()
        expected = dense_grouped.mean().to_sparse()

        tm.assert_frame_equal(result, expected)

        # ToDo: sparse sum includes str column
        # tm.assert_frame_equal(sparse_grouped.sum(),
        #                       dense_grouped.sum())

        result = sparse_grouped.count().to_sparse()
        expected = dense_grouped.count().to_sparse()

        tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("fill_value", [0, np.nan])
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
def test_groupby_includes_fill_value(fill_value):
    # https://github.com/pandas-dev/pandas/issues/5078
    df = pd.DataFrame(
        {
            "a": [fill_value, 1, fill_value, fill_value],
            "b": [fill_value, 1, fill_value, fill_value],
        }
    )
    sdf = df.to_sparse(fill_value=fill_value)
    result = sdf.groupby("a").sum()
    expected = pd.DataFrame(df.groupby("a").sum().to_sparse(fill_value=fill_value))
    tm.assert_frame_equal(result, expected, check_index_type=False)