# -*- coding: utf-8 -*-
"""
These the test the public routines exposed in types/common.py
related to inference and not otherwise tested in types/test_common.py
"""
import collections
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from fractions import Fraction
from numbers import Number
import re
from warnings import catch_warnings, simplefilter
import numpy as np
import pytest
import pytz
from pandas._libs import iNaT, lib, missing as libmissing
from pandas.compat import PY2, StringIO, lrange, u
import pandas.util._test_decorators as td
from pandas.core.dtypes import inference
from pandas.core.dtypes.common import (
ensure_categorical, ensure_int32, is_bool, is_datetime64_any_dtype,
is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
is_float, is_integer, is_number, is_scalar, is_scipy_sparse,
is_timedelta64_dtype, is_timedelta64_ns_dtype)
import pandas as pd
from pandas import (
Categorical, DataFrame, DateOffset, DatetimeIndex, Index, Interval, Panel,
Period, Series, Timedelta, TimedeltaIndex, Timestamp, compat, isna)
from pandas.util import testing as tm
@pytest.fixture(params=[True, False], ids=str)
def coerce(request):
return request.param
# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
([1], True, 'list'), # noqa: E241
([], True, 'list-empty'), # noqa: E241
((1, ), True, 'tuple'), # noqa: E241
(tuple(), True, 'tuple-empty'), # noqa: E241
({'a': 1}, True, 'dict'), # noqa: E241
(dict(), True, 'dict-empty'), # noqa: E241
({'a', 1}, 'set', 'set'), # noqa: E241
(set(), 'set', 'set-empty'), # noqa: E241
(frozenset({'a', 1}), 'set', 'frozenset'), # noqa: E241
(frozenset(), 'set', 'frozenset-empty'), # noqa: E241
(iter([1, 2]), True, 'iterator'), # noqa: E241
(iter([]), True, 'iterator-empty'), # noqa: E241
((x for x in [1, 2]), True, 'generator'), # noqa: E241
((x for x in []), True, 'generator-empty'), # noqa: E241
(Series([1]), True, 'Series'), # noqa: E241
(Series([]), True, 'Series-empty'), # noqa: E241
(Series(['a']).str, True, 'StringMethods'), # noqa: E241
(Series([], dtype='O').str, True, 'StringMethods-empty'), # noqa: E241
(Index([1]), True, 'Index'), # noqa: E241
(Index([]), True, 'Index-empty'), # noqa: E241
(DataFrame([[1]]), True, 'DataFrame'), # noqa: E241
(DataFrame(), True, 'DataFrame-empty'), # noqa: E241
(np.ndarray((2,) * 1), True, 'ndarray-1d'), # noqa: E241
(np.array([]), True, 'ndarray-1d-empty'), # noqa: E241
(np.ndarray((2,) * 2), True, 'ndarray-2d'), # noqa: E241
(np.array([[]]), True, 'ndarray-2d-empty'), # noqa: E241
(np.ndarray((2,) * 3), True, 'ndarray-3d'), # noqa: E241
(np.array([[[]]]), True, 'ndarray-3d-empty'), # noqa: E241
(np.ndarray((2,) * 4), True, 'ndarray-4d'), # noqa: E241
(np.array([[[[]]]]), True, 'ndarray-4d-empty'), # noqa: E241
(np.array(2), False, 'ndarray-0d'), # noqa: E241
(1, False, 'int'), # noqa: E241
(b'123', False, 'bytes'), # noqa: E241
(b'', False, 'bytes-empty'), # noqa: E241
('123', False, 'string'), # noqa: E241
('', False, 'string-empty'), # noqa: E241
(str, False, 'string-type'), # noqa: E241
(object(), False, 'object'), # noqa: E241
(np.nan, False, 'NaN'), # noqa: E241
(None, False, 'None') # noqa: E241
]
objs, expected, ids = zip(*ll_params)
@pytest.fixture(params=zip(objs, expected), ids=ids)
def maybe_list_like(request):
return request.param
def test_is_list_like(maybe_list_like):
obj, expected = maybe_list_like
expected = True if expected == 'set' else expected
assert inference.is_list_like(obj) == expected
def test_is_list_like_disallow_sets(maybe_list_like):
obj, expected = maybe_list_like
expected = False if expected == 'set' else expected
assert inference.is_list_like(obj, allow_sets=False) == expected
def test_is_sequence():
is_seq = inference.is_sequence
assert (is_seq((1, 2)))
assert (is_seq([1, 2]))
assert (not is_seq("abcd"))
assert (not is_seq(u("abcd")))
assert (not is_seq(np.int64))
class A(object):
def __getitem__(self):
return 1
assert (not is_seq(A()))
def test_is_array_like():
assert inference.is_array_like(Series([]))
assert inference.is_array_like(Series([1, 2]))
assert inference.is_array_like(np.array(["a", "b"]))
assert inference.is_array_like(Index(["2016-01-01"]))
class DtypeList(list):
dtype = "special"
assert inference.is_array_like(DtypeList())
assert not inference.is_array_like([1, 2, 3])
assert not inference.is_array_like(tuple())
assert not inference.is_array_like("foo")
assert not inference.is_array_like(123)
@pytest.mark.parametrize('inner', [
[], [1], (1, ), (1, 2), {'a': 1}, {1, 'a'}, Series([1]),
Series([]), Series(['a']).str, (x for x in range(5))
])
@pytest.mark.parametrize('outer', [
list, Series, np.array, tuple
])
def test_is_nested_list_like_passes(inner, outer):
result = outer([inner for _ in range(5)])
assert inference.is_list_like(result)
@pytest.mark.parametrize('obj', [
'abc', [], [1], (1,), ['a'], 'a', {'a'},
[1, 2, 3], Series([1]), DataFrame({"A": [1]}),
([1, 2] for _ in range(5)),
])
def test_is_nested_list_like_fails(obj):
assert not inference.is_nested_list_like(obj)
@pytest.mark.parametrize(
"ll", [{}, {'A': 1}, Series([1]), collections.defaultdict()])
def test_is_dict_like_passes(ll):
assert inference.is_dict_like(ll)
@pytest.mark.parametrize("ll", [
'1', 1, [1, 2], (1, 2), range(2), Index([1]),
dict, collections.defaultdict, Series
])
def test_is_dict_like_fails(ll):
assert not inference.is_dict_like(ll)
@pytest.mark.parametrize("has_keys", [True, False])
@pytest.mark.parametrize("has_getitem", [True, False])
@pytest.mark.parametrize("has_contains", [True, False])
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
class DictLike(object):
def __init__(self, d):
self.d = d
if has_keys:
def keys(self):
return self.d.keys()
if has_getitem:
def __getitem__(self, key):
return self.d.__getitem__(key)
if has_contains:
def __contains__(self, key):
return self.d.__contains__(key)
d = DictLike({1: 2})
result = inference.is_dict_like(d)
expected = has_keys and has_getitem and has_contains
assert result is expected
def test_is_file_like():
class MockFile(object):
pass
is_file = inference.is_file_like
data = StringIO("data")
assert is_file(data)
# No read / write attributes
# No iterator attributes
m = MockFile()
assert not is_file(m)
MockFile.write = lambda self: 0
# Write attribute but not an iterator
m = MockFile()
assert not is_file(m)
# gh-16530: Valid iterator just means we have the
# __iter__ attribute for our purposes.
MockFile.__iter__ = lambda self: self
# Valid write-only file
m = MockFile()
assert is_file(m)
del MockFile.write
MockFile.read = lambda self: 0
# Valid read-only file
m = MockFile()
assert is_file(m)
# Iterator but no read / write attributes
data = [1, 2, 3]
assert not is_file(data)
@pytest.mark.parametrize(
"ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)])
def test_is_names_tuple_passes(ll):
assert inference.is_named_tuple(ll)
@pytest.mark.parametrize(
"ll", [(1, 2, 3), 'a', Series({'pi': 3.14})])
def test_is_names_tuple_fails(ll):
assert not inference.is_named_tuple(ll)
def test_is_hashable():
# all new-style classes are hashable by default
class HashableClass(object):
pass
class UnhashableClass1(object):
__hash__ = None
class UnhashableClass2(object):
def __hash__(self):
raise TypeError("Not hashable")
hashable = (1,
3.14,
np.float64(3.14),
'a',
tuple(),
(1, ),
HashableClass(), )
not_hashable = ([], UnhashableClass1(), )
abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), )
for i in hashable:
assert inference.is_hashable(i)
for i in not_hashable:
assert not inference.is_hashable(i)
for i in abc_hashable_not_really_hashable:
assert not inference.is_hashable(i)
# numpy.array is no longer collections.Hashable as of
# https://github.com/numpy/numpy/pull/5326, just test
# is_hashable()
assert not inference.is_hashable(np.array([]))
# old-style classes in Python 2 don't appear hashable to
# collections.Hashable but also seem to support hash() by default
if PY2:
class OldStyleClass():
pass
c = OldStyleClass()
assert not isinstance(c, compat.Hashable)
assert inference.is_hashable(c)
hash(c) # this will not raise
@pytest.mark.parametrize(
"ll", [re.compile('ad')])
def test_is_re_passes(ll):
assert inference.is_re(ll)
@pytest.mark.parametrize(
"ll", ['x', 2, 3, object()])
def test_is_re_fails(ll):
assert not inference.is_re(ll)
@pytest.mark.parametrize(
"ll", [r'a', u('x'),
r'asdf',
re.compile('adsf'),
u(r'\u2233\s*'),
re.compile(r'')])
def test_is_recompilable_passes(ll):
assert inference.is_re_compilable(ll)
@pytest.mark.parametrize(
"ll", [1, [], object()])
def test_is_recompilable_fails(ll):
assert not inference.is_re_compilable(ll)
class TestInference(object):
def test_infer_dtype_bytes(self):
compare = 'string' if PY2 else 'bytes'
# string array of bytes
arr = np.array(list('abc'), dtype='S1')
assert lib.infer_dtype(arr, skipna=True) == compare
# object array of bytes
arr = arr.astype(object)
assert lib.infer_dtype(arr, skipna=True) == compare
# object array of bytes with missing values
assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare
Loading ...