Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / generic / test_frame.py

from copy import deepcopy
from distutils.version import LooseVersion
from operator import methodcaller

import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import DataFrame, MultiIndex, Series, date_range
import pandas.util.testing as tm
from pandas.util.testing import (
    assert_almost_equal,
    assert_frame_equal,
    assert_series_equal,
)

from .test_generic import Generic

try:
    import xarray

    _XARRAY_INSTALLED = True
except ImportError:
    _XARRAY_INSTALLED = False


class TestDataFrame(Generic):
    _typ = DataFrame
    _comparator = lambda self, x, y: assert_frame_equal(x, y)

    def test_rename_mi(self):
        df = DataFrame(
            [11, 21, 31],
            index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
        )
        df.rename(str.lower)

    def test_set_axis_name(self):
        df = pd.DataFrame([[1, 2], [3, 4]])
        funcs = ["_set_axis_name", "rename_axis"]
        for func in funcs:
            result = methodcaller(func, "foo")(df)
            assert df.index.name is None
            assert result.index.name == "foo"

            result = methodcaller(func, "cols", axis=1)(df)
            assert df.columns.name is None
            assert result.columns.name == "cols"

    def test_set_axis_name_mi(self):
        df = DataFrame(
            np.empty((3, 3)),
            index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]),
            columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]),
        )

        level_names = ["L1", "L2"]
        funcs = ["_set_axis_name", "rename_axis"]
        for func in funcs:
            result = methodcaller(func, level_names)(df)
            assert result.index.names == level_names
            assert result.columns.names == [None, None]

            result = methodcaller(func, level_names, axis=1)(df)
            assert result.columns.names == ["L1", "L2"]
            assert result.index.names == [None, None]

    def test_nonzero_single_element(self):

        # allow single item via bool method
        df = DataFrame([[True]])
        assert df.bool()

        df = DataFrame([[False]])
        assert not df.bool()

        df = DataFrame([[False, False]])
        with pytest.raises(ValueError):
            df.bool()
        with pytest.raises(ValueError):
            bool(df)

    def test_get_numeric_data_preserve_dtype(self):

        # get the numeric data
        o = DataFrame({"A": [1, "2", 3.0]})
        result = o._get_numeric_data()
        expected = DataFrame(index=[0, 1, 2], dtype=object)
        self._compare(result, expected)

    def test_metadata_propagation_indiv(self):

        # groupby
        df = DataFrame(
            {
                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
                "C": np.random.randn(8),
                "D": np.random.randn(8),
            }
        )
        result = df.groupby("A").sum()
        self.check_metadata(df, result)

        # resample
        df = DataFrame(
            np.random.randn(1000, 2),
            index=date_range("20130101", periods=1000, freq="s"),
        )
        result = df.resample("1T")
        self.check_metadata(df, result)

        # merging with override
        # GH 6923
        _metadata = DataFrame._metadata
        _finalize = DataFrame.__finalize__

        np.random.seed(10)
        df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"])
        df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"])
        DataFrame._metadata = ["filename"]
        df1.filename = "fname1.csv"
        df2.filename = "fname2.csv"

        def finalize(self, other, method=None, **kwargs):

            for name in self._metadata:
                if method == "merge":
                    left, right = other.left, other.right
                    value = getattr(left, name, "") + "|" + getattr(right, name, "")
                    object.__setattr__(self, name, value)
                else:
                    object.__setattr__(self, name, getattr(other, name, ""))

            return self

        DataFrame.__finalize__ = finalize
        result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner")
        assert result.filename == "fname1.csv|fname2.csv"

        # concat
        # GH 6927
        DataFrame._metadata = ["filename"]
        df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab"))
        df1.filename = "foo"

        def finalize(self, other, method=None, **kwargs):
            for name in self._metadata:
                if method == "concat":
                    value = "+".join(
                        [getattr(o, name) for o in other.objs if getattr(o, name, None)]
                    )
                    object.__setattr__(self, name, value)
                else:
                    object.__setattr__(self, name, getattr(other, name, None))

            return self

        DataFrame.__finalize__ = finalize

        result = pd.concat([df1, df1])
        assert result.filename == "foo+foo"

        # reset
        DataFrame._metadata = _metadata
        DataFrame.__finalize__ = _finalize

    def test_set_attribute(self):
        # Test for consistent setattr behavior when an attribute and a column
        # have the same name (Issue #8994)
        df = DataFrame({"x": [1, 2, 3]})

        df.y = 2
        df["y"] = [2, 4, 6]
        df.y = 5

        assert df.y == 5
        assert_series_equal(df["y"], Series([2, 4, 6], name="y"))

    @pytest.mark.skipif(
        not _XARRAY_INSTALLED
        or _XARRAY_INSTALLED
        and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"),
        reason="xarray >= 0.10.0 required",
    )
    @pytest.mark.parametrize(
        "index",
        [
            "FloatIndex",
            "IntIndex",
            "StringIndex",
            "UnicodeIndex",
            "DateIndex",
            "PeriodIndex",
            "CategoricalIndex",
            "TimedeltaIndex",
        ],
    )
    def test_to_xarray_index_types(self, index):
        from xarray import Dataset

        index = getattr(tm, "make{}".format(index))
        df = DataFrame(
            {
                "a": list("abc"),
                "b": list(range(1, 4)),
                "c": np.arange(3, 6).astype("u1"),
                "d": np.arange(4.0, 7.0, dtype="float64"),
                "e": [True, False, True],
                "f": pd.Categorical(list("abc")),
                "g": pd.date_range("20130101", periods=3),
                "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
            }
        )

        df.index = index(3)
        df.index.name = "foo"
        df.columns.name = "bar"
        result = df.to_xarray()
        assert result.dims["foo"] == 3
        assert len(result.coords) == 1
        assert len(result.data_vars) == 8
        assert_almost_equal(list(result.coords.keys()), ["foo"])
        assert isinstance(result, Dataset)

        # idempotency
        # categoricals are not preserved
        # datetimes w/tz are not preserved
        # column names are lost
        expected = df.copy()
        expected["f"] = expected["f"].astype(object)
        expected["h"] = expected["h"].astype("datetime64[ns]")
        expected.columns.name = None
        assert_frame_equal(
            result.to_dataframe(),
            expected,
            check_index_type=False,
            check_categorical=False,
        )

    @td.skip_if_no("xarray", min_version="0.7.0")
    def test_to_xarray(self):
        from xarray import Dataset

        df = DataFrame(
            {
                "a": list("abc"),
                "b": list(range(1, 4)),
                "c": np.arange(3, 6).astype("u1"),
                "d": np.arange(4.0, 7.0, dtype="float64"),
                "e": [True, False, True],
                "f": pd.Categorical(list("abc")),
                "g": pd.date_range("20130101", periods=3),
                "h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
            }
        )

        df.index.name = "foo"
        result = df[0:0].to_xarray()
        assert result.dims["foo"] == 0
        assert isinstance(result, Dataset)

        # available in 0.7.1
        # MultiIndex
        df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
        result = df.to_xarray()
        assert result.dims["one"] == 1
        assert result.dims["two"] == 3
        assert len(result.coords) == 2
        assert len(result.data_vars) == 8
        assert_almost_equal(list(result.coords.keys()), ["one", "two"])
        assert isinstance(result, Dataset)

        result = result.to_dataframe()
        expected = df.copy()
        expected["f"] = expected["f"].astype(object)
        expected["h"] = expected["h"].astype("datetime64[ns]")
        expected.columns.name = None
        assert_frame_equal(result, expected, check_index_type=False)

    def test_deepcopy_empty(self):
        # This test covers empty frame copying with non-empty column sets
        # as reported in issue GH15370
        empty_frame = DataFrame(data=[], index=[], columns=["A"])
        empty_frame_copy = deepcopy(empty_frame)

        self._compare(empty_frame_copy, empty_frame)