Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / dtypes / test_common.py

# -*- coding: utf-8 -*-

import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas.core.dtypes.common as com
from pandas.core.dtypes.dtypes import (
    CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, IntervalDtype,
    PeriodDtype)

import pandas as pd
from pandas.conftest import (
    ALL_EA_INT_DTYPES, ALL_INT_DTYPES, SIGNED_EA_INT_DTYPES, SIGNED_INT_DTYPES,
    UNSIGNED_EA_INT_DTYPES, UNSIGNED_INT_DTYPES)
from pandas.core.sparse.api import SparseDtype
import pandas.util.testing as tm


# EA & Actual Dtypes
def to_ea_dtypes(dtypes):
    """ convert list of string dtypes to EA dtype """
    return [getattr(pd, dt + 'Dtype') for dt in dtypes]


def to_numpy_dtypes(dtypes):
    """ convert list of string dtypes to numpy dtype """
    return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)]


class TestPandasDtype(object):

    # Passing invalid dtype, both as a string or object, must raise TypeError
    # Per issue GH15520
    @pytest.mark.parametrize('box', [pd.Timestamp, 'pd.Timestamp', list])
    def test_invalid_dtype_error(self, box):
        with pytest.raises(TypeError, match='not understood'):
            com.pandas_dtype(box)

    @pytest.mark.parametrize('dtype', [
        object, 'float64', np.object_, np.dtype('object'), 'O',
        np.float64, float, np.dtype('float64')])
    def test_pandas_dtype_valid(self, dtype):
        assert com.pandas_dtype(dtype) == dtype

    @pytest.mark.parametrize('dtype', [
        'M8[ns]', 'm8[ns]', 'object', 'float64', 'int64'])
    def test_numpy_dtype(self, dtype):
        assert com.pandas_dtype(dtype) == np.dtype(dtype)

    def test_numpy_string_dtype(self):
        # do not parse freq-like string as period dtype
        assert com.pandas_dtype('U') == np.dtype('U')
        assert com.pandas_dtype('S') == np.dtype('S')

    @pytest.mark.parametrize('dtype', [
        'datetime64[ns, US/Eastern]',
        'datetime64[ns, Asia/Tokyo]',
        'datetime64[ns, UTC]'])
    def test_datetimetz_dtype(self, dtype):
        assert (com.pandas_dtype(dtype) ==
                DatetimeTZDtype.construct_from_string(dtype))
        assert com.pandas_dtype(dtype) == dtype

    def test_categorical_dtype(self):
        assert com.pandas_dtype('category') == CategoricalDtype()

    @pytest.mark.parametrize('dtype', [
        'period[D]', 'period[3M]', 'period[U]',
        'Period[D]', 'Period[3M]', 'Period[U]'])
    def test_period_dtype(self, dtype):
        assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == dtype


dtypes = dict(datetime_tz=com.pandas_dtype('datetime64[ns, US/Eastern]'),
              datetime=com.pandas_dtype('datetime64[ns]'),
              timedelta=com.pandas_dtype('timedelta64[ns]'),
              period=PeriodDtype('D'),
              integer=np.dtype(np.int64),
              float=np.dtype(np.float64),
              object=np.dtype(np.object),
              category=com.pandas_dtype('category'))


@pytest.mark.parametrize('name1,dtype1',
                         list(dtypes.items()),
                         ids=lambda x: str(x))
@pytest.mark.parametrize('name2,dtype2',
                         list(dtypes.items()),
                         ids=lambda x: str(x))
def test_dtype_equal(name1, dtype1, name2, dtype2):

    # match equal to self, but not equal to other
    assert com.is_dtype_equal(dtype1, dtype1)
    if name1 != name2:
        assert not com.is_dtype_equal(dtype1, dtype2)


@pytest.mark.parametrize("dtype1,dtype2", [
    (np.int8, np.int64),
    (np.int16, np.int64),
    (np.int32, np.int64),
    (np.float32, np.float64),
    (PeriodDtype("D"), PeriodDtype("2D")),  # PeriodType
    (com.pandas_dtype("datetime64[ns, US/Eastern]"),
     com.pandas_dtype("datetime64[ns, CET]")),  # Datetime
    (None, None)  # gh-15941: no exception should be raised.
])
def test_dtype_equal_strict(dtype1, dtype2):
    assert not com.is_dtype_equal(dtype1, dtype2)


def get_is_dtype_funcs():
    """
    Get all functions in pandas.core.dtypes.common that
    begin with 'is_' and end with 'dtype'

    """

    fnames = [f for f in dir(com) if (f.startswith('is_') and
                                      f.endswith('dtype'))]
    return [getattr(com, fname) for fname in fnames]


@pytest.mark.parametrize('func',
                         get_is_dtype_funcs(),
                         ids=lambda x: x.__name__)
def test_get_dtype_error_catch(func):
    # see gh-15941
    #
    # No exception should be raised.

    assert not func(None)


def test_is_object():
    assert com.is_object_dtype(object)
    assert com.is_object_dtype(np.array([], dtype=object))

    assert not com.is_object_dtype(int)
    assert not com.is_object_dtype(np.array([], dtype=int))
    assert not com.is_object_dtype([1, 2, 3])


@pytest.mark.parametrize("check_scipy", [
    False, pytest.param(True, marks=td.skip_if_no_scipy)
])
def test_is_sparse(check_scipy):
    assert com.is_sparse(pd.SparseArray([1, 2, 3]))
    assert com.is_sparse(pd.SparseSeries([1, 2, 3]))

    assert not com.is_sparse(np.array([1, 2, 3]))

    if check_scipy:
        import scipy.sparse
        assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))


@td.skip_if_no_scipy
def test_is_scipy_sparse():
    from scipy.sparse import bsr_matrix
    assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))

    assert not com.is_scipy_sparse(pd.SparseArray([1, 2, 3]))
    assert not com.is_scipy_sparse(pd.SparseSeries([1, 2, 3]))


def test_is_categorical():
    cat = pd.Categorical([1, 2, 3])
    assert com.is_categorical(cat)
    assert com.is_categorical(pd.Series(cat))
    assert com.is_categorical(pd.CategoricalIndex([1, 2, 3]))

    assert not com.is_categorical([1, 2, 3])


def test_is_datetimetz():
    with tm.assert_produces_warning(FutureWarning):
        assert not com.is_datetimetz([1, 2, 3])
        assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3]))

        assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))

        dtype = DatetimeTZDtype("ns", tz="US/Eastern")
        s = pd.Series([], dtype=dtype)
        assert com.is_datetimetz(s)


def test_is_period_deprecated():
    with tm.assert_produces_warning(FutureWarning):
        assert not com.is_period([1, 2, 3])
        assert not com.is_period(pd.Index([1, 2, 3]))
        assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D"))


def test_is_datetime64_dtype():
    assert not com.is_datetime64_dtype(object)
    assert not com.is_datetime64_dtype([1, 2, 3])
    assert not com.is_datetime64_dtype(np.array([], dtype=int))

    assert com.is_datetime64_dtype(np.datetime64)
    assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64))


def test_is_datetime64tz_dtype():
    assert not com.is_datetime64tz_dtype(object)
    assert not com.is_datetime64tz_dtype([1, 2, 3])
    assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3]))
    assert com.is_datetime64tz_dtype(pd.DatetimeIndex(['2000'],
                                                      tz="US/Eastern"))


def test_is_timedelta64_dtype():
    assert not com.is_timedelta64_dtype(object)
    assert not com.is_timedelta64_dtype(None)
    assert not com.is_timedelta64_dtype([1, 2, 3])
    assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64))
    assert not com.is_timedelta64_dtype('0 days')
    assert not com.is_timedelta64_dtype("0 days 00:00:00")
    assert not com.is_timedelta64_dtype(["0 days 00:00:00"])
    assert not com.is_timedelta64_dtype("NO DATE")

    assert com.is_timedelta64_dtype(np.timedelta64)
    assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
    assert com.is_timedelta64_dtype(pd.to_timedelta(['0 days', '1 days']))


def test_is_period_dtype():
    assert not com.is_period_dtype(object)
    assert not com.is_period_dtype([1, 2, 3])
    assert not com.is_period_dtype(pd.Period("2017-01-01"))

    assert com.is_period_dtype(PeriodDtype(freq="D"))
    assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))


def test_is_interval_dtype():
    assert not com.is_interval_dtype(object)
    assert not com.is_interval_dtype([1, 2, 3])

    assert com.is_interval_dtype(IntervalDtype())

    interval = pd.Interval(1, 2, closed="right")
    assert not com.is_interval_dtype(interval)
    assert com.is_interval_dtype(pd.IntervalIndex([interval]))


def test_is_categorical_dtype():
    assert not com.is_categorical_dtype(object)
    assert not com.is_categorical_dtype([1, 2, 3])

    assert com.is_categorical_dtype(CategoricalDtype())
    assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
    assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))


def test_is_string_dtype():
    assert not com.is_string_dtype(int)
    assert not com.is_string_dtype(pd.Series([1, 2]))

    assert com.is_string_dtype(str)
    assert com.is_string_dtype(object)
    assert com.is_string_dtype(np.array(['a', 'b']))


def test_is_period_arraylike():
    assert not com.is_period_arraylike([1, 2, 3])
    assert not com.is_period_arraylike(pd.Index([1, 2, 3]))
    assert com.is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D"))


def test_is_datetime_arraylike():
    assert not com.is_datetime_arraylike([1, 2, 3])
    assert not com.is_datetime_arraylike(pd.Index([1, 2, 3]))
    assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3]))


def test_is_datetimelike():
    assert not com.is_datetimelike([1, 2, 3])
    assert not com.is_datetimelike(pd.Index([1, 2, 3]))

    assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3]))
    assert com.is_datetimelike(pd.PeriodIndex([], freq="A"))
    assert com.is_datetimelike(np.array([], dtype=np.datetime64))
    assert com.is_datetimelike(pd.Series([], dtype="timedelta64[ns]"))
    assert com.is_datetimelike(pd.DatetimeIndex(["2000"], tz="US/Eastern"))

    dtype = DatetimeTZDtype("ns", tz="US/Eastern")
    s = pd.Series([], dtype=dtype)
    assert com.is_datetimelike(s)


@pytest.mark.parametrize(
    'dtype', [
        pd.Series([1, 2])] +
    ALL_INT_DTYPES + to_numpy_dtypes(ALL_INT_DTYPES) +
    ALL_EA_INT_DTYPES + to_ea_dtypes(ALL_EA_INT_DTYPES))
def test_is_integer_dtype(dtype):
    assert com.is_integer_dtype(dtype)


@pytest.mark.parametrize(
    'dtype', [str, float, np.datetime64, np.timedelta64,
              pd.Index([1, 2.]), np.array(['a', 'b']),
              np.array([], dtype=np.timedelta64)])
def test_is_not_integer_dtype(dtype):
    assert not com.is_integer_dtype(dtype)


@pytest.mark.parametrize(
    'dtype', [
        pd.Series([1, 2])] +
    SIGNED_INT_DTYPES + to_numpy_dtypes(SIGNED_INT_DTYPES) +
    SIGNED_EA_INT_DTYPES + to_ea_dtypes(SIGNED_EA_INT_DTYPES))
def test_is_signed_integer_dtype(dtype):
    assert com.is_integer_dtype(dtype)


@pytest.mark.parametrize(
    'dtype',
    [
        str, float, np.datetime64, np.timedelta64,
        pd.Index([1, 2.]), np.array(['a', 'b']),
        np.array([], dtype=np.timedelta64)] +
    UNSIGNED_INT_DTYPES + to_numpy_dtypes(UNSIGNED_INT_DTYPES) +
    UNSIGNED_EA_INT_DTYPES + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES))
def test_is_not_signed_integer_dtype(dtype):
    assert not com.is_signed_integer_dtype(dtype)


@pytest.mark.parametrize(
    'dtype',
    [pd.Series([1, 2], dtype=np.uint32)] +
    UNSIGNED_INT_DTYPES + to_numpy_dtypes(UNSIGNED_INT_DTYPES) +
    UNSIGNED_EA_INT_DTYPES + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES))
def test_is_unsigned_integer_dtype(dtype):
    assert com.is_unsigned_integer_dtype(dtype)


@pytest.mark.parametrize(
    'dtype',
    [
Loading ...