Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / frame / common.py

import numpy as np

from pandas.util._decorators import cache_readonly

import pandas as pd
import pandas.util.testing as tm

_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()

_frame = pd.DataFrame(_seriesd)
_frame2 = pd.DataFrame(_seriesd, columns=["D", "C", "B", "A"])
_intframe = pd.DataFrame({k: v.astype(int) for k, v in _seriesd.items()})

_tsframe = pd.DataFrame(_tsd)

_mixed_frame = _frame.copy()
_mixed_frame["foo"] = "bar"


class TestData:
    @cache_readonly
    def frame(self):
        return _frame.copy()

    @cache_readonly
    def frame2(self):
        return _frame2.copy()

    @cache_readonly
    def intframe(self):
        # force these all to int64 to avoid platform testing issues
        return pd.DataFrame({c: s for c, s in _intframe.items()}, dtype=np.int64)

    @cache_readonly
    def tsframe(self):
        return _tsframe.copy()

    @cache_readonly
    def mixed_frame(self):
        return _mixed_frame.copy()

    @cache_readonly
    def mixed_float(self):
        return pd.DataFrame(
            {
                "A": _frame["A"].copy().astype("float32"),
                "B": _frame["B"].copy().astype("float32"),
                "C": _frame["C"].copy().astype("float16"),
                "D": _frame["D"].copy().astype("float64"),
            }
        )

    @cache_readonly
    def mixed_float2(self):
        return pd.DataFrame(
            {
                "A": _frame2["A"].copy().astype("float32"),
                "B": _frame2["B"].copy().astype("float32"),
                "C": _frame2["C"].copy().astype("float16"),
                "D": _frame2["D"].copy().astype("float64"),
            }
        )

    @cache_readonly
    def mixed_int(self):
        return pd.DataFrame(
            {
                "A": _intframe["A"].copy().astype("int32"),
                "B": np.ones(len(_intframe["B"]), dtype="uint64"),
                "C": _intframe["C"].copy().astype("uint8"),
                "D": _intframe["D"].copy().astype("int64"),
            }
        )

    @cache_readonly
    def all_mixed(self):
        return pd.DataFrame(
            {
                "a": 1.0,
                "b": 2,
                "c": "foo",
                "float32": np.array([1.0] * 10, dtype="float32"),
                "int32": np.array([1] * 10, dtype="int32"),
            },
            index=np.arange(10),
        )

    @cache_readonly
    def tzframe(self):
        result = pd.DataFrame(
            {
                "A": pd.date_range("20130101", periods=3),
                "B": pd.date_range("20130101", periods=3, tz="US/Eastern"),
                "C": pd.date_range("20130101", periods=3, tz="CET"),
            }
        )
        result.iloc[1, 1] = pd.NaT
        result.iloc[1, 2] = pd.NaT
        return result

    @cache_readonly
    def empty(self):
        return pd.DataFrame()

    @cache_readonly
    def ts1(self):
        return tm.makeTimeSeries(nper=30)

    @cache_readonly
    def ts2(self):
        return tm.makeTimeSeries(nper=30)[5:]

    @cache_readonly
    def simple(self):
        arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])

        return pd.DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])


# self.ts3 = tm.makeTimeSeries()[-5:]
# self.ts4 = tm.makeTimeSeries()[1:-1]


def _check_mixed_float(df, dtype=None):
    # float16 are most likely to be upcasted to float32
    dtypes = dict(A="float32", B="float32", C="float16", D="float64")
    if isinstance(dtype, str):
        dtypes = {k: dtype for k, v in dtypes.items()}
    elif isinstance(dtype, dict):
        dtypes.update(dtype)
    if dtypes.get("A"):
        assert df.dtypes["A"] == dtypes["A"]
    if dtypes.get("B"):
        assert df.dtypes["B"] == dtypes["B"]
    if dtypes.get("C"):
        assert df.dtypes["C"] == dtypes["C"]
    if dtypes.get("D"):
        assert df.dtypes["D"] == dtypes["D"]


def _check_mixed_int(df, dtype=None):
    dtypes = dict(A="int32", B="uint64", C="uint8", D="int64")
    if isinstance(dtype, str):
        dtypes = {k: dtype for k, v in dtypes.items()}
    elif isinstance(dtype, dict):
        dtypes.update(dtype)
    if dtypes.get("A"):
        assert df.dtypes["A"] == dtypes["A"]
    if dtypes.get("B"):
        assert df.dtypes["B"] == dtypes["B"]
    if dtypes.get("C"):
        assert df.dtypes["C"] == dtypes["C"]
    if dtypes.get("D"):
        assert df.dtypes["D"] == dtypes["D"]