Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / dtypes / test_dtypes.py

# -*- coding: utf-8 -*-
import re

import numpy as np
import pytest

from pandas.core.dtypes.common import (
    is_bool_dtype, is_categorical, is_categorical_dtype,
    is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64_ns_dtype,
    is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_interval_dtype,
    is_period, is_period_dtype, is_string_dtype)
from pandas.core.dtypes.dtypes import (
    CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, registry)

import pandas as pd
from pandas import (
    Categorical, CategoricalIndex, IntervalIndex, Series, date_range)
from pandas.core.sparse.api import SparseDtype
import pandas.util.testing as tm


@pytest.fixture(params=[True, False, None])
def ordered(request):
    return request.param


class Base(object):

    def setup_method(self, method):
        self.dtype = self.create()

    def test_hash(self):
        hash(self.dtype)

    def test_equality_invalid(self):
        assert not self.dtype == 'foo'
        assert not is_dtype_equal(self.dtype, np.int64)

    def test_numpy_informed(self):
        pytest.raises(TypeError, np.dtype, self.dtype)

        assert not self.dtype == np.str_
        assert not np.str_ == self.dtype

    def test_pickle(self):
        # make sure our cache is NOT pickled

        # clear the cache
        type(self.dtype).reset_cache()
        assert not len(self.dtype._cache)

        # force back to the cache
        result = tm.round_trip_pickle(self.dtype)
        assert not len(self.dtype._cache)
        assert result == self.dtype


class TestCategoricalDtype(Base):

    def create(self):
        return CategoricalDtype()

    def test_pickle(self):
        # make sure our cache is NOT pickled

        # clear the cache
        type(self.dtype).reset_cache()
        assert not len(self.dtype._cache)

        # force back to the cache
        result = tm.round_trip_pickle(self.dtype)
        assert result == self.dtype

    def test_hash_vs_equality(self):
        dtype = self.dtype
        dtype2 = CategoricalDtype()
        assert dtype == dtype2
        assert dtype2 == dtype
        assert hash(dtype) == hash(dtype2)

    def test_equality(self):
        assert is_dtype_equal(self.dtype, 'category')
        assert is_dtype_equal(self.dtype, CategoricalDtype())
        assert not is_dtype_equal(self.dtype, 'foo')

    def test_construction_from_string(self):
        result = CategoricalDtype.construct_from_string('category')
        assert is_dtype_equal(self.dtype, result)
        pytest.raises(
            TypeError, lambda: CategoricalDtype.construct_from_string('foo'))

    def test_constructor_invalid(self):
        msg = "Parameter 'categories' must be list-like"
        with pytest.raises(TypeError, match=msg):
            CategoricalDtype("category")

    dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
    dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
    c = Categorical([0, 1], dtype=dtype1, fastpath=True)

    @pytest.mark.parametrize('values, categories, ordered, dtype, expected',
                             [
                                 [None, None, None, None,
                                  CategoricalDtype()],
                                 [None, ['a', 'b'], True, None, dtype1],
                                 [c, None, None, dtype2, dtype2],
                                 [c, ['x', 'y'], False, None, dtype2],
                             ])
    def test_from_values_or_dtype(
            self, values, categories, ordered, dtype, expected):
        result = CategoricalDtype._from_values_or_dtype(values, categories,
                                                        ordered, dtype)
        assert result == expected

    @pytest.mark.parametrize('values, categories, ordered, dtype', [
        [None, ['a', 'b'], True, dtype2],
        [None, ['a', 'b'], None, dtype2],
        [None, None, True, dtype2],
    ])
    def test_from_values_or_dtype_raises(self, values, categories,
                                         ordered, dtype):
        msg = "Cannot specify `categories` or `ordered` together with `dtype`."
        with pytest.raises(ValueError, match=msg):
            CategoricalDtype._from_values_or_dtype(values, categories,
                                                   ordered, dtype)

    def test_is_dtype(self):
        assert CategoricalDtype.is_dtype(self.dtype)
        assert CategoricalDtype.is_dtype('category')
        assert CategoricalDtype.is_dtype(CategoricalDtype())
        assert not CategoricalDtype.is_dtype('foo')
        assert not CategoricalDtype.is_dtype(np.float64)

    def test_basic(self):

        assert is_categorical_dtype(self.dtype)

        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

        s = Series(factor, name='A')

        # dtypes
        assert is_categorical_dtype(s.dtype)
        assert is_categorical_dtype(s)
        assert not is_categorical_dtype(np.dtype('float64'))

        assert is_categorical(s.dtype)
        assert is_categorical(s)
        assert not is_categorical(np.dtype('float64'))
        assert not is_categorical(1.0)

    def test_tuple_categories(self):
        categories = [(1, 'a'), (2, 'b'), (3, 'c')]
        result = CategoricalDtype(categories)
        assert all(result.categories == categories)

    @pytest.mark.parametrize("categories, expected", [
        ([True, False], True),
        ([True, False, None], True),
        ([True, False, "a", "b'"], False),
        ([0, 1], False),
    ])
    def test_is_boolean(self, categories, expected):
        cat = Categorical(categories)
        assert cat.dtype._is_boolean is expected
        assert is_bool_dtype(cat) is expected
        assert is_bool_dtype(cat.dtype) is expected


class TestDatetimeTZDtype(Base):

    def create(self):
        return DatetimeTZDtype('ns', 'US/Eastern')

    def test_alias_to_unit_raises(self):
        # 23990
        with tm.assert_produces_warning(FutureWarning):
            DatetimeTZDtype('datetime64[ns, US/Central]')

    def test_alias_to_unit_bad_alias_raises(self):
        # 23990
        with pytest.raises(TypeError, match=''):
            DatetimeTZDtype('this is a bad string')

        with pytest.raises(TypeError, match=''):
            DatetimeTZDtype('datetime64[ns, US/NotATZ]')

    def test_hash_vs_equality(self):
        # make sure that we satisfy is semantics
        dtype = self.dtype
        dtype2 = DatetimeTZDtype('ns', 'US/Eastern')
        dtype3 = DatetimeTZDtype(dtype2)
        assert dtype == dtype2
        assert dtype2 == dtype
        assert dtype3 == dtype
        assert hash(dtype) == hash(dtype2)
        assert hash(dtype) == hash(dtype3)

        dtype4 = DatetimeTZDtype("ns", "US/Central")
        assert dtype2 != dtype4
        assert hash(dtype2) != hash(dtype4)

    def test_construction(self):
        pytest.raises(ValueError,
                      lambda: DatetimeTZDtype('ms', 'US/Eastern'))

    def test_subclass(self):
        a = DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]')
        b = DatetimeTZDtype.construct_from_string('datetime64[ns, CET]')

        assert issubclass(type(a), type(a))
        assert issubclass(type(a), type(b))

    def test_compat(self):
        assert is_datetime64tz_dtype(self.dtype)
        assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]')
        assert is_datetime64_any_dtype(self.dtype)
        assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]')
        assert is_datetime64_ns_dtype(self.dtype)
        assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')
        assert not is_datetime64_dtype(self.dtype)
        assert not is_datetime64_dtype('datetime64[ns, US/Eastern]')

    def test_construction_from_string(self):
        result = DatetimeTZDtype.construct_from_string(
            'datetime64[ns, US/Eastern]')
        assert is_dtype_equal(self.dtype, result)
        pytest.raises(TypeError,
                      lambda: DatetimeTZDtype.construct_from_string('foo'))

    def test_construct_from_string_raises(self):
        with pytest.raises(TypeError, match="notatz"):
            DatetimeTZDtype.construct_from_string('datetime64[ns, notatz]')

        with pytest.raises(TypeError,
                           match="^Could not construct DatetimeTZDtype$"):
            DatetimeTZDtype.construct_from_string(['datetime64[ns, notatz]'])

    def test_is_dtype(self):
        assert not DatetimeTZDtype.is_dtype(None)
        assert DatetimeTZDtype.is_dtype(self.dtype)
        assert DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')
        assert not DatetimeTZDtype.is_dtype('foo')
        assert DatetimeTZDtype.is_dtype(DatetimeTZDtype('ns', 'US/Pacific'))
        assert not DatetimeTZDtype.is_dtype(np.float64)

    def test_equality(self):
        assert is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')
        assert is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'US/Eastern'))
        assert not is_dtype_equal(self.dtype, 'foo')
        assert not is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET'))
        assert not is_dtype_equal(DatetimeTZDtype('ns', 'US/Eastern'),
                                  DatetimeTZDtype('ns', 'US/Pacific'))

        # numpy compat
        assert is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")

    def test_basic(self):

        assert is_datetime64tz_dtype(self.dtype)

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr, name='A')

        # dtypes
        assert is_datetime64tz_dtype(s.dtype)
        assert is_datetime64tz_dtype(s)
        assert not is_datetime64tz_dtype(np.dtype('float64'))
        assert not is_datetime64tz_dtype(1.0)

        with tm.assert_produces_warning(FutureWarning):
            assert is_datetimetz(s)
            assert is_datetimetz(s.dtype)
            assert not is_datetimetz(np.dtype('float64'))
            assert not is_datetimetz(1.0)

    def test_dst(self):

        dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern')
        s1 = Series(dr1, name='A')
        assert is_datetime64tz_dtype(s1)
        with tm.assert_produces_warning(FutureWarning):
            assert is_datetimetz(s1)

        dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern')
        s2 = Series(dr2, name='A')
        assert is_datetime64tz_dtype(s2)
        with tm.assert_produces_warning(FutureWarning):
            assert is_datetimetz(s2)
        assert s1.dtype == s2.dtype

    @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern'])
    @pytest.mark.parametrize('constructor', ['M8', 'datetime64'])
    def test_parser(self, tz, constructor):
        # pr #11245
        dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz)
        result = DatetimeTZDtype.construct_from_string(dtz_str)
        expected = DatetimeTZDtype('ns', tz)
        assert result == expected

    def test_empty(self):
        with pytest.raises(TypeError, match="A 'tz' is required."):
            DatetimeTZDtype()


class TestPeriodDtype(Base):

    def create(self):
        return PeriodDtype('D')

    def test_hash_vs_equality(self):
        # make sure that we satisfy is semantics
        dtype = self.dtype
        dtype2 = PeriodDtype('D')
        dtype3 = PeriodDtype(dtype2)
        assert dtype == dtype2
        assert dtype2 == dtype
        assert dtype3 == dtype
        assert dtype is dtype2
        assert dtype2 is dtype
        assert dtype3 is dtype
        assert hash(dtype) == hash(dtype2)
        assert hash(dtype) == hash(dtype3)

    def test_construction(self):
        with pytest.raises(ValueError):
            PeriodDtype('xx')

        for s in ['period[D]', 'Period[D]', 'D']:
            dt = PeriodDtype(s)
            assert dt.freq == pd.tseries.offsets.Day()
            assert is_period_dtype(dt)

        for s in ['period[3D]', 'Period[3D]', '3D']:
            dt = PeriodDtype(s)
            assert dt.freq == pd.tseries.offsets.Day(3)
            assert is_period_dtype(dt)

        for s in ['period[26H]', 'Period[26H]', '26H',
                  'period[1D2H]', 'Period[1D2H]', '1D2H']:
            dt = PeriodDtype(s)
            assert dt.freq == pd.tseries.offsets.Hour(26)
            assert is_period_dtype(dt)

    def test_subclass(self):
Loading ...