# -*- coding: utf-8 -*-
import re
import numpy as np
import pytest
from pandas.core.dtypes.common import (
is_bool_dtype, is_categorical, is_categorical_dtype,
is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64_ns_dtype,
is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_interval_dtype,
is_period, is_period_dtype, is_string_dtype)
from pandas.core.dtypes.dtypes import (
CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, registry)
import pandas as pd
from pandas import (
Categorical, CategoricalIndex, IntervalIndex, Series, date_range)
from pandas.core.sparse.api import SparseDtype
import pandas.util.testing as tm
@pytest.fixture(params=[True, False, None])
def ordered(request):
return request.param
class Base(object):
def setup_method(self, method):
self.dtype = self.create()
def test_hash(self):
hash(self.dtype)
def test_equality_invalid(self):
assert not self.dtype == 'foo'
assert not is_dtype_equal(self.dtype, np.int64)
def test_numpy_informed(self):
pytest.raises(TypeError, np.dtype, self.dtype)
assert not self.dtype == np.str_
assert not np.str_ == self.dtype
def test_pickle(self):
# make sure our cache is NOT pickled
# clear the cache
type(self.dtype).reset_cache()
assert not len(self.dtype._cache)
# force back to the cache
result = tm.round_trip_pickle(self.dtype)
assert not len(self.dtype._cache)
assert result == self.dtype
class TestCategoricalDtype(Base):
def create(self):
return CategoricalDtype()
def test_pickle(self):
# make sure our cache is NOT pickled
# clear the cache
type(self.dtype).reset_cache()
assert not len(self.dtype._cache)
# force back to the cache
result = tm.round_trip_pickle(self.dtype)
assert result == self.dtype
def test_hash_vs_equality(self):
dtype = self.dtype
dtype2 = CategoricalDtype()
assert dtype == dtype2
assert dtype2 == dtype
assert hash(dtype) == hash(dtype2)
def test_equality(self):
assert is_dtype_equal(self.dtype, 'category')
assert is_dtype_equal(self.dtype, CategoricalDtype())
assert not is_dtype_equal(self.dtype, 'foo')
def test_construction_from_string(self):
result = CategoricalDtype.construct_from_string('category')
assert is_dtype_equal(self.dtype, result)
pytest.raises(
TypeError, lambda: CategoricalDtype.construct_from_string('foo'))
def test_constructor_invalid(self):
msg = "Parameter 'categories' must be list-like"
with pytest.raises(TypeError, match=msg):
CategoricalDtype("category")
dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
c = Categorical([0, 1], dtype=dtype1, fastpath=True)
@pytest.mark.parametrize('values, categories, ordered, dtype, expected',
[
[None, None, None, None,
CategoricalDtype()],
[None, ['a', 'b'], True, None, dtype1],
[c, None, None, dtype2, dtype2],
[c, ['x', 'y'], False, None, dtype2],
])
def test_from_values_or_dtype(
self, values, categories, ordered, dtype, expected):
result = CategoricalDtype._from_values_or_dtype(values, categories,
ordered, dtype)
assert result == expected
@pytest.mark.parametrize('values, categories, ordered, dtype', [
[None, ['a', 'b'], True, dtype2],
[None, ['a', 'b'], None, dtype2],
[None, None, True, dtype2],
])
def test_from_values_or_dtype_raises(self, values, categories,
ordered, dtype):
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
with pytest.raises(ValueError, match=msg):
CategoricalDtype._from_values_or_dtype(values, categories,
ordered, dtype)
def test_is_dtype(self):
assert CategoricalDtype.is_dtype(self.dtype)
assert CategoricalDtype.is_dtype('category')
assert CategoricalDtype.is_dtype(CategoricalDtype())
assert not CategoricalDtype.is_dtype('foo')
assert not CategoricalDtype.is_dtype(np.float64)
def test_basic(self):
assert is_categorical_dtype(self.dtype)
factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
s = Series(factor, name='A')
# dtypes
assert is_categorical_dtype(s.dtype)
assert is_categorical_dtype(s)
assert not is_categorical_dtype(np.dtype('float64'))
assert is_categorical(s.dtype)
assert is_categorical(s)
assert not is_categorical(np.dtype('float64'))
assert not is_categorical(1.0)
def test_tuple_categories(self):
categories = [(1, 'a'), (2, 'b'), (3, 'c')]
result = CategoricalDtype(categories)
assert all(result.categories == categories)
@pytest.mark.parametrize("categories, expected", [
([True, False], True),
([True, False, None], True),
([True, False, "a", "b'"], False),
([0, 1], False),
])
def test_is_boolean(self, categories, expected):
cat = Categorical(categories)
assert cat.dtype._is_boolean is expected
assert is_bool_dtype(cat) is expected
assert is_bool_dtype(cat.dtype) is expected
class TestDatetimeTZDtype(Base):
def create(self):
return DatetimeTZDtype('ns', 'US/Eastern')
def test_alias_to_unit_raises(self):
# 23990
with tm.assert_produces_warning(FutureWarning):
DatetimeTZDtype('datetime64[ns, US/Central]')
def test_alias_to_unit_bad_alias_raises(self):
# 23990
with pytest.raises(TypeError, match=''):
DatetimeTZDtype('this is a bad string')
with pytest.raises(TypeError, match=''):
DatetimeTZDtype('datetime64[ns, US/NotATZ]')
def test_hash_vs_equality(self):
# make sure that we satisfy is semantics
dtype = self.dtype
dtype2 = DatetimeTZDtype('ns', 'US/Eastern')
dtype3 = DatetimeTZDtype(dtype2)
assert dtype == dtype2
assert dtype2 == dtype
assert dtype3 == dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)
dtype4 = DatetimeTZDtype("ns", "US/Central")
assert dtype2 != dtype4
assert hash(dtype2) != hash(dtype4)
def test_construction(self):
pytest.raises(ValueError,
lambda: DatetimeTZDtype('ms', 'US/Eastern'))
def test_subclass(self):
a = DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]')
b = DatetimeTZDtype.construct_from_string('datetime64[ns, CET]')
assert issubclass(type(a), type(a))
assert issubclass(type(a), type(b))
def test_compat(self):
assert is_datetime64tz_dtype(self.dtype)
assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]')
assert is_datetime64_any_dtype(self.dtype)
assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]')
assert is_datetime64_ns_dtype(self.dtype)
assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')
assert not is_datetime64_dtype(self.dtype)
assert not is_datetime64_dtype('datetime64[ns, US/Eastern]')
def test_construction_from_string(self):
result = DatetimeTZDtype.construct_from_string(
'datetime64[ns, US/Eastern]')
assert is_dtype_equal(self.dtype, result)
pytest.raises(TypeError,
lambda: DatetimeTZDtype.construct_from_string('foo'))
def test_construct_from_string_raises(self):
with pytest.raises(TypeError, match="notatz"):
DatetimeTZDtype.construct_from_string('datetime64[ns, notatz]')
with pytest.raises(TypeError,
match="^Could not construct DatetimeTZDtype$"):
DatetimeTZDtype.construct_from_string(['datetime64[ns, notatz]'])
def test_is_dtype(self):
assert not DatetimeTZDtype.is_dtype(None)
assert DatetimeTZDtype.is_dtype(self.dtype)
assert DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')
assert not DatetimeTZDtype.is_dtype('foo')
assert DatetimeTZDtype.is_dtype(DatetimeTZDtype('ns', 'US/Pacific'))
assert not DatetimeTZDtype.is_dtype(np.float64)
def test_equality(self):
assert is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')
assert is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'US/Eastern'))
assert not is_dtype_equal(self.dtype, 'foo')
assert not is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET'))
assert not is_dtype_equal(DatetimeTZDtype('ns', 'US/Eastern'),
DatetimeTZDtype('ns', 'US/Pacific'))
# numpy compat
assert is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")
def test_basic(self):
assert is_datetime64tz_dtype(self.dtype)
dr = date_range('20130101', periods=3, tz='US/Eastern')
s = Series(dr, name='A')
# dtypes
assert is_datetime64tz_dtype(s.dtype)
assert is_datetime64tz_dtype(s)
assert not is_datetime64tz_dtype(np.dtype('float64'))
assert not is_datetime64tz_dtype(1.0)
with tm.assert_produces_warning(FutureWarning):
assert is_datetimetz(s)
assert is_datetimetz(s.dtype)
assert not is_datetimetz(np.dtype('float64'))
assert not is_datetimetz(1.0)
def test_dst(self):
dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern')
s1 = Series(dr1, name='A')
assert is_datetime64tz_dtype(s1)
with tm.assert_produces_warning(FutureWarning):
assert is_datetimetz(s1)
dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern')
s2 = Series(dr2, name='A')
assert is_datetime64tz_dtype(s2)
with tm.assert_produces_warning(FutureWarning):
assert is_datetimetz(s2)
assert s1.dtype == s2.dtype
@pytest.mark.parametrize('tz', ['UTC', 'US/Eastern'])
@pytest.mark.parametrize('constructor', ['M8', 'datetime64'])
def test_parser(self, tz, constructor):
# pr #11245
dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz)
result = DatetimeTZDtype.construct_from_string(dtz_str)
expected = DatetimeTZDtype('ns', tz)
assert result == expected
def test_empty(self):
with pytest.raises(TypeError, match="A 'tz' is required."):
DatetimeTZDtype()
class TestPeriodDtype(Base):
def create(self):
return PeriodDtype('D')
def test_hash_vs_equality(self):
# make sure that we satisfy is semantics
dtype = self.dtype
dtype2 = PeriodDtype('D')
dtype3 = PeriodDtype(dtype2)
assert dtype == dtype2
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype
assert dtype3 is dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)
def test_construction(self):
with pytest.raises(ValueError):
PeriodDtype('xx')
for s in ['period[D]', 'Period[D]', 'D']:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Day()
assert is_period_dtype(dt)
for s in ['period[3D]', 'Period[3D]', '3D']:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Day(3)
assert is_period_dtype(dt)
for s in ['period[26H]', 'Period[26H]', '26H',
'period[1D2H]', 'Period[1D2H]', '1D2H']:
dt = PeriodDtype(s)
assert dt.freq == pd.tseries.offsets.Hour(26)
assert is_period_dtype(dt)
def test_subclass(self):
Loading ...