Repository URL to install this package:
|
Version:
0.21.1 ▾
|
# -*- coding: utf-8 -*-
"""
These the test the public routines exposed in types/common.py
related to inference and not otherwise tested in types/test_common.py
"""
from warnings import catch_warnings
import collections
import re
from datetime import datetime, date, timedelta, time
from decimal import Decimal
import numpy as np
import pytz
import pytest
import pandas as pd
from pandas._libs import tslib, lib
from pandas import (Series, Index, DataFrame, Timedelta,
DatetimeIndex, TimedeltaIndex, Timestamp,
Panel, Period, Categorical, isna)
from pandas.compat import u, PY2, PY3, StringIO, lrange
from pandas.core.dtypes import inference
from pandas.core.dtypes.common import (
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
is_datetime64_dtype,
is_datetime64_ns_dtype,
is_datetime64_any_dtype,
is_datetime64tz_dtype,
is_number,
is_integer,
is_float,
is_bool,
is_scalar,
is_scipy_sparse,
_ensure_int32,
_ensure_categorical)
from pandas.util import testing as tm
@pytest.fixture(params=[True, False], ids=lambda val: str(val))
def coerce(request):
return request.param
def test_is_sequence():
is_seq = inference.is_sequence
assert (is_seq((1, 2)))
assert (is_seq([1, 2]))
assert (not is_seq("abcd"))
assert (not is_seq(u("abcd")))
assert (not is_seq(np.int64))
class A(object):
def __getitem__(self):
return 1
assert (not is_seq(A()))
def test_is_list_like():
passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
Series([]), Series(['a']).str)
fails = (1, '2', object(), str)
for p in passes:
assert inference.is_list_like(p)
for f in fails:
assert not inference.is_list_like(f)
@pytest.mark.parametrize('inner', [
[], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
Series([]), Series(['a']).str, (x for x in range(5))
])
@pytest.mark.parametrize('outer', [
list, Series, np.array, tuple
])
def test_is_nested_list_like_passes(inner, outer):
result = outer([inner for _ in range(5)])
assert inference.is_list_like(result)
@pytest.mark.parametrize('obj', [
'abc', [], [1], (1,), ['a'], 'a', {'a'},
[1, 2, 3], Series([1]), DataFrame({"A": [1]}),
([1, 2] for _ in range(5)),
])
def test_is_nested_list_like_fails(obj):
assert not inference.is_nested_list_like(obj)
def test_is_dict_like():
passes = [{}, {'A': 1}, Series([1])]
fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]
for p in passes:
assert inference.is_dict_like(p)
for f in fails:
assert not inference.is_dict_like(f)
def test_is_file_like():
class MockFile(object):
pass
is_file = inference.is_file_like
data = StringIO("data")
assert is_file(data)
# No read / write attributes
# No iterator attributes
m = MockFile()
assert not is_file(m)
MockFile.write = lambda self: 0
# Write attribute but not an iterator
m = MockFile()
assert not is_file(m)
# gh-16530: Valid iterator just means we have the
# __iter__ attribute for our purposes.
MockFile.__iter__ = lambda self: self
# Valid write-only file
m = MockFile()
assert is_file(m)
del MockFile.write
MockFile.read = lambda self: 0
# Valid read-only file
m = MockFile()
assert is_file(m)
# Iterator but no read / write attributes
data = [1, 2, 3]
assert not is_file(data)
if PY3:
from unittest import mock
assert not is_file(mock.Mock())
def test_is_named_tuple():
passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), )
fails = ((1, 2, 3), 'a', Series({'pi': 3.14}))
for p in passes:
assert inference.is_named_tuple(p)
for f in fails:
assert not inference.is_named_tuple(f)
def test_is_hashable():
# all new-style classes are hashable by default
class HashableClass(object):
pass
class UnhashableClass1(object):
__hash__ = None
class UnhashableClass2(object):
def __hash__(self):
raise TypeError("Not hashable")
hashable = (1,
3.14,
np.float64(3.14),
'a',
tuple(),
(1, ),
HashableClass(), )
not_hashable = ([], UnhashableClass1(), )
abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), )
for i in hashable:
assert inference.is_hashable(i)
for i in not_hashable:
assert not inference.is_hashable(i)
for i in abc_hashable_not_really_hashable:
assert not inference.is_hashable(i)
# numpy.array is no longer collections.Hashable as of
# https://github.com/numpy/numpy/pull/5326, just test
# is_hashable()
assert not inference.is_hashable(np.array([]))
# old-style classes in Python 2 don't appear hashable to
# collections.Hashable but also seem to support hash() by default
if PY2:
class OldStyleClass():
pass
c = OldStyleClass()
assert not isinstance(c, collections.Hashable)
assert inference.is_hashable(c)
hash(c) # this will not raise
def test_is_re():
passes = re.compile('ad'),
fails = 'x', 2, 3, object()
for p in passes:
assert inference.is_re(p)
for f in fails:
assert not inference.is_re(f)
def test_is_recompilable():
passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'),
re.compile(r''))
fails = 1, [], object()
for p in passes:
assert inference.is_re_compilable(p)
for f in fails:
assert not inference.is_re_compilable(f)
class TestInference(object):
def test_infer_dtype_bytes(self):
compare = 'string' if PY2 else 'bytes'
# string array of bytes
arr = np.array(list('abc'), dtype='S1')
assert lib.infer_dtype(arr) == compare
# object array of bytes
arr = arr.astype(object)
assert lib.infer_dtype(arr) == compare
# object array of bytes with missing values
assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare
def test_isinf_scalar(self):
# GH 11352
assert lib.isposinf_scalar(float('inf'))
assert lib.isposinf_scalar(np.inf)
assert not lib.isposinf_scalar(-np.inf)
assert not lib.isposinf_scalar(1)
assert not lib.isposinf_scalar('a')
assert lib.isneginf_scalar(float('-inf'))
assert lib.isneginf_scalar(-np.inf)
assert not lib.isneginf_scalar(np.inf)
assert not lib.isneginf_scalar(1)
assert not lib.isneginf_scalar('a')
def test_maybe_convert_numeric_infinities(self):
# see gh-13274
infinities = ['inf', 'inF', 'iNf', 'Inf',
'iNF', 'InF', 'INf', 'INF']
na_values = set(['', 'NULL', 'nan'])
pos = np.array(['inf'], dtype=np.float64)
neg = np.array(['-inf'], dtype=np.float64)
msg = "Unable to parse string"
for infinity in infinities:
for maybe_int in (True, False):
out = lib.maybe_convert_numeric(
np.array([infinity], dtype=object),
na_values, maybe_int)
tm.assert_numpy_array_equal(out, pos)
out = lib.maybe_convert_numeric(
np.array(['-' + infinity], dtype=object),
na_values, maybe_int)
tm.assert_numpy_array_equal(out, neg)
out = lib.maybe_convert_numeric(
np.array([u(infinity)], dtype=object),
na_values, maybe_int)
tm.assert_numpy_array_equal(out, pos)
out = lib.maybe_convert_numeric(
np.array(['+' + infinity], dtype=object),
na_values, maybe_int)
tm.assert_numpy_array_equal(out, pos)
# too many characters
with tm.assert_raises_regex(ValueError, msg):
lib.maybe_convert_numeric(
np.array(['foo_' + infinity], dtype=object),
na_values, maybe_int)
def test_maybe_convert_numeric_post_floatify_nan(self):
# see gh-13314
data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
nan_values = set([-999, -999.0])
for coerce_type in (True, False):
out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
tm.assert_numpy_array_equal(out, expected)
def test_convert_infs(self):
arr = np.array(['inf', 'inf', 'inf'], dtype='O')
result = lib.maybe_convert_numeric(arr, set(), False)
assert result.dtype == np.float64
arr = np.array(['-inf', '-inf', '-inf'], dtype='O')
result = lib.maybe_convert_numeric(arr, set(), False)
assert result.dtype == np.float64
def test_scientific_no_exponent(self):
# See PR 12215
arr = np.array(['42E', '2E', '99e', '6e'], dtype='O')
result = lib.maybe_convert_numeric(arr, set(), False, True)
assert np.all(np.isnan(result))
def test_convert_non_hashable(self):
# GH13324
# make sure that we are handing non-hashables
arr = np.array([[10.0, 2], 1.0, 'apple'])
result = lib.maybe_convert_numeric(arr, set(), False, True)
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
def test_convert_numeric_uint64(self):
arr = np.array([2**63], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
arr = np.array([str(2**63)], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
arr = np.array([np.uint64(2**63)], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
@pytest.mark.parametrize("arr", [
np.array([2**63, np.nan], dtype=object),
np.array([str(2**63), np.nan], dtype=object),
np.array([np.nan, 2**63], dtype=object),
np.array([np.nan, str(2**63)], dtype=object)])
def test_convert_numeric_uint64_nan(self, coerce, arr):
expected = arr.astype(float) if coerce else arr.copy()
result = lib.maybe_convert_numeric(arr, set(),
coerce_numeric=coerce)
tm.assert_almost_equal(result, expected)
def test_convert_numeric_uint64_nan_values(self, coerce):
arr = np.array([2**63, 2**63 + 1], dtype=object)
na_values = set([2**63])
expected = (np.array([np.nan, 2**63 + 1], dtype=float)
if coerce else arr.copy())
result = lib.maybe_convert_numeric(arr, na_values,
coerce_numeric=coerce)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize("case", [
np.array([2**63, -1], dtype=object),
np.array([str(2**63), -1], dtype=object),
np.array([str(2**63), str(-1)], dtype=object),
np.array([-1, 2**63], dtype=object),
np.array([-1, str(2**63)], dtype=object),
np.array([str(-1), str(2**63)], dtype=object)])
def test_convert_numeric_int64_uint64(self, case, coerce):
expected = case.astype(float) if coerce else case.copy()
result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
tm.assert_almost_equal(result, expected)
def test_maybe_convert_objects_uint64(self):
# see gh-4471
arr = np.array([2**63], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
# NumPy bug: can't compare uint64 to int64, as that
# results in both casting to float64, so we should
# make sure that this function is robust against it
arr = np.array([np.uint64(2**63)], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
arr = np.array([2, -1], dtype=object)
exp = np.array([2, -1], dtype=np.int64)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
arr = np.array([2**63, -1], dtype=object)
exp = np.array([2**63, -1], dtype=object)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
def test_mixed_dtypes_remain_object_array(self):
# GH14956
array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1],
dtype=object)
result = lib.maybe_convert_objects(array, convert_datetime=1)
tm.assert_numpy_array_equal(result, array)
class TestTypeInference(object):
def test_length_zero(self):
result = lib.infer_dtype(np.array([], dtype='i4'))
assert result == 'integer'
result = lib.infer_dtype([])
assert result == 'empty'
# GH 18004
arr = np.array([np.array([], dtype=object),
np.array([], dtype=object)])
result = lib.infer_dtype(arr)
assert result == 'empty'
def test_integers(self):
arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'integer'
arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'mixed-integer'
arr = np.array([1, 2, 3, 4, 5], dtype='i4')
result = lib.infer_dtype(arr)
assert result == 'integer'
def test_bools(self):
arr = np.array([True, False, True, True, True], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'boolean'
arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'boolean'
arr = np.array([True, False, True, 'foo'], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'mixed'
arr = np.array([True, False, True], dtype=bool)
result = lib.infer_dtype(arr)
assert result == 'boolean'
arr = np.array([True, np.nan, False], dtype='O')
result = lib.infer_dtype(arr, skipna=True)
assert result == 'boolean'
def test_floats(self):
arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'floating'
arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
dtype='O')
result = lib.infer_dtype(arr)
assert result == 'mixed-integer'
arr = np.array([1, 2, 3, 4, 5], dtype='f4')
result = lib.infer_dtype(arr)
assert result == 'floating'
arr = np.array([1, 2, 3, 4, 5], dtype='f8')
result = lib.infer_dtype(arr)
assert result == 'floating'
def test_decimals(self):
# GH15690
arr = np.array([Decimal(1), Decimal(2), Decimal(3)])
result = lib.infer_dtype(arr)
assert result == 'decimal'
arr = np.array([1.0, 2.0, Decimal(3)])
result = lib.infer_dtype(arr)
assert result == 'mixed'
arr = np.array([Decimal(1), Decimal('NaN'), Decimal(3)])
result = lib.infer_dtype(arr)
assert result == 'decimal'
arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'decimal'
def test_string(self):
pass
def test_unicode(self):
arr = [u'a', np.nan, u'c']
result = lib.infer_dtype(arr)
assert result == 'mixed'
arr = [u'a', np.nan, u'c']
result = lib.infer_dtype(arr, skipna=True)
expected = 'unicode' if PY2 else 'string'
assert result == expected
def test_datetime(self):
dates = [datetime(2012, 1, x) for x in range(1, 20)]
index = Index(dates)
assert index.inferred_type == 'datetime64'
def test_infer_dtype_datetime(self):
arr = np.array([Timestamp('2011-01-01'),
Timestamp('2011-01-02')])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([np.datetime64('2011-01-01'),
np.datetime64('2011-01-01')], dtype=object)
assert lib.infer_dtype(arr) == 'datetime64'
arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)])
assert lib.infer_dtype(arr) == 'datetime'
# starts with nan
for n in [pd.NaT, np.nan]:
arr = np.array([n, pd.Timestamp('2011-01-02')])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([n, np.datetime64('2011-01-02')])
assert lib.infer_dtype(arr) == 'datetime64'
arr = np.array([n, datetime(2011, 1, 1)])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([n, pd.Timestamp('2011-01-02'), n])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([n, np.datetime64('2011-01-02'), n])
assert lib.infer_dtype(arr) == 'datetime64'
arr = np.array([n, datetime(2011, 1, 1), n])
assert lib.infer_dtype(arr) == 'datetime'
# different type of nat
arr = np.array([np.timedelta64('nat'),
np.datetime64('2011-01-02')], dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([np.datetime64('2011-01-02'),
np.timedelta64('nat')], dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
# mixed datetime
arr = np.array([datetime(2011, 1, 1),
pd.Timestamp('2011-01-02')])
assert lib.infer_dtype(arr) == 'datetime'
# should be datetime?
arr = np.array([np.datetime64('2011-01-01'),
pd.Timestamp('2011-01-02')])
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([pd.Timestamp('2011-01-02'),
np.datetime64('2011-01-01')])
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1])
assert lib.infer_dtype(arr) == 'mixed-integer'
arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1])
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')])
assert lib.infer_dtype(arr) == 'mixed'
def test_infer_dtype_timedelta(self):
arr = np.array([pd.Timedelta('1 days'),
pd.Timedelta('2 days')])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([np.timedelta64(1, 'D'),
np.timedelta64(2, 'D')], dtype=object)
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([timedelta(1), timedelta(2)])
assert lib.infer_dtype(arr) == 'timedelta'
# starts with nan
for n in [pd.NaT, np.nan]:
arr = np.array([n, Timedelta('1 days')])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([n, np.timedelta64(1, 'D')])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([n, timedelta(1)])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([n, pd.Timedelta('1 days'), n])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([n, np.timedelta64(1, 'D'), n])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([n, timedelta(1), n])
assert lib.infer_dtype(arr) == 'timedelta'
# different type of nat
arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')],
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')],
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
def test_infer_dtype_period(self):
# GH 13664
arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='D')])
assert lib.infer_dtype(arr) == 'period'
arr = np.array([pd.Period('2011-01', freq='D'),
pd.Period('2011-02', freq='M')])
assert lib.infer_dtype(arr) == 'period'
# starts with nan
for n in [pd.NaT, np.nan]:
arr = np.array([n, pd.Period('2011-01', freq='D')])
assert lib.infer_dtype(arr) == 'period'
arr = np.array([n, pd.Period('2011-01', freq='D'), n])
assert lib.infer_dtype(arr) == 'period'
# different type of nat
arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
def test_infer_dtype_all_nan_nat_like(self):
arr = np.array([np.nan, np.nan])
assert lib.infer_dtype(arr) == 'floating'
# nan and None mix are result in mixed
arr = np.array([np.nan, np.nan, None])
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([None, np.nan, np.nan])
assert lib.infer_dtype(arr) == 'mixed'
# pd.NaT
arr = np.array([pd.NaT])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([pd.NaT, np.nan])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([np.nan, pd.NaT])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([np.nan, pd.NaT, np.nan])
assert lib.infer_dtype(arr) == 'datetime'
arr = np.array([None, pd.NaT, None])
assert lib.infer_dtype(arr) == 'datetime'
# np.datetime64(nat)
arr = np.array([np.datetime64('nat')])
assert lib.infer_dtype(arr) == 'datetime64'
for n in [np.nan, pd.NaT, None]:
arr = np.array([n, np.datetime64('nat'), n])
assert lib.infer_dtype(arr) == 'datetime64'
arr = np.array([pd.NaT, n, np.datetime64('nat'), n])
assert lib.infer_dtype(arr) == 'datetime64'
arr = np.array([np.timedelta64('nat')], dtype=object)
assert lib.infer_dtype(arr) == 'timedelta'
for n in [np.nan, pd.NaT, None]:
arr = np.array([n, np.timedelta64('nat'), n])
assert lib.infer_dtype(arr) == 'timedelta'
arr = np.array([pd.NaT, n, np.timedelta64('nat'), n])
assert lib.infer_dtype(arr) == 'timedelta'
# datetime / timedelta mixed
arr = np.array([pd.NaT, np.datetime64('nat'),
np.timedelta64('nat'), np.nan])
assert lib.infer_dtype(arr) == 'mixed'
arr = np.array([np.timedelta64('nat'), np.datetime64('nat')],
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'
def test_is_datetimelike_array_all_nan_nat_like(self):
arr = np.array([np.nan, pd.NaT, np.datetime64('nat')])
assert lib.is_datetime_array(arr)
assert lib.is_datetime64_array(arr)
assert not lib.is_timedelta_array(arr)
assert not lib.is_timedelta64_array(arr)
assert not lib.is_timedelta_or_timedelta64_array(arr)
arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')])
assert not lib.is_datetime_array(arr)
assert not lib.is_datetime64_array(arr)
assert lib.is_timedelta_array(arr)
assert lib.is_timedelta64_array(arr)
assert lib.is_timedelta_or_timedelta64_array(arr)
arr = np.array([np.nan, pd.NaT, np.datetime64('nat'),
np.timedelta64('nat')])
assert not lib.is_datetime_array(arr)
assert not lib.is_datetime64_array(arr)
assert not lib.is_timedelta_array(arr)
assert not lib.is_timedelta64_array(arr)
assert not lib.is_timedelta_or_timedelta64_array(arr)
arr = np.array([np.nan, pd.NaT])
assert lib.is_datetime_array(arr)
assert lib.is_datetime64_array(arr)
assert lib.is_timedelta_array(arr)
assert lib.is_timedelta64_array(arr)
assert lib.is_timedelta_or_timedelta64_array(arr)
arr = np.array([np.nan, np.nan], dtype=object)
assert not lib.is_datetime_array(arr)
assert not lib.is_datetime64_array(arr)
assert not lib.is_timedelta_array(arr)
assert not lib.is_timedelta64_array(arr)
assert not lib.is_timedelta_or_timedelta64_array(arr)
def test_date(self):
dates = [date(2012, 1, day) for day in range(1, 20)]
index = Index(dates)
assert index.inferred_type == 'date'
dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan]
result = lib.infer_dtype(dates)
assert result == 'mixed'
result = lib.infer_dtype(dates, skipna=True)
assert result == 'date'
def test_to_object_array_tuples(self):
r = (5, 6)
values = [r]
result = lib.to_object_array_tuples(values)
try:
# make sure record array works
from collections import namedtuple
record = namedtuple('record', 'x y')
r = record(5, 6)
values = [r]
result = lib.to_object_array_tuples(values) # noqa
except ImportError:
pass
def test_object(self):
# GH 7431
# cannot infer more than this as only a single element
arr = np.array([None], dtype='O')
result = lib.infer_dtype(arr)
assert result == 'mixed'
def test_to_object_array_width(self):
# see gh-13320
rows = [[1, 2, 3], [4, 5, 6]]
expected = np.array(rows, dtype=object)
out = lib.to_object_array(rows)
tm.assert_numpy_array_equal(out, expected)
expected = np.array(rows, dtype=object)
out = lib.to_object_array(rows, min_width=1)
tm.assert_numpy_array_equal(out, expected)
expected = np.array([[1, 2, 3, None, None],
[4, 5, 6, None, None]], dtype=object)
out = lib.to_object_array(rows, min_width=5)
tm.assert_numpy_array_equal(out, expected)
def test_is_period(self):
assert lib.is_period(pd.Period('2011-01', freq='M'))
assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))
assert not lib.is_period(pd.Timestamp('2011-01'))
assert not lib.is_period(1)
assert not lib.is_period(np.nan)
def test_categorical(self):
# GH 8974
from pandas import Categorical, Series
arr = Categorical(list('abc'))
result = lib.infer_dtype(arr)
assert result == 'categorical'
result = lib.infer_dtype(Series(arr))
assert result == 'categorical'
arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
result = lib.infer_dtype(arr)
assert result == 'categorical'
result = lib.infer_dtype(Series(arr))
assert result == 'categorical'
class TestNumberScalar(object):
def test_is_number(self):
assert is_number(True)
assert is_number(1)
assert is_number(1.1)
assert is_number(1 + 3j)
assert is_number(np.bool(False))
assert is_number(np.int64(1))
assert is_number(np.float64(1.1))
assert is_number(np.complex128(1 + 3j))
assert is_number(np.nan)
assert not is_number(None)
assert not is_number('x')
assert not is_number(datetime(2011, 1, 1))
assert not is_number(np.datetime64('2011-01-01'))
assert not is_number(Timestamp('2011-01-01'))
assert not is_number(Timestamp('2011-01-01', tz='US/Eastern'))
assert not is_number(timedelta(1000))
assert not is_number(Timedelta('1 days'))
# questionable
assert not is_number(np.bool_(False))
assert is_number(np.timedelta64(1, 'D'))
def test_is_bool(self):
assert is_bool(True)
assert is_bool(np.bool(False))
assert is_bool(np.bool_(False))
assert not is_bool(1)
assert not is_bool(1.1)
assert not is_bool(1 + 3j)
assert not is_bool(np.int64(1))
assert not is_bool(np.float64(1.1))
assert not is_bool(np.complex128(1 + 3j))
assert not is_bool(np.nan)
assert not is_bool(None)
assert not is_bool('x')
assert not is_bool(datetime(2011, 1, 1))
assert not is_bool(np.datetime64('2011-01-01'))
assert not is_bool(Timestamp('2011-01-01'))
assert not is_bool(Timestamp('2011-01-01', tz='US/Eastern'))
assert not is_bool(timedelta(1000))
assert not is_bool(np.timedelta64(1, 'D'))
assert not is_bool(Timedelta('1 days'))
def test_is_integer(self):
assert is_integer(1)
assert is_integer(np.int64(1))
assert not is_integer(True)
assert not is_integer(1.1)
assert not is_integer(1 + 3j)
assert not is_integer(np.bool(False))
assert not is_integer(np.bool_(False))
assert not is_integer(np.float64(1.1))
assert not is_integer(np.complex128(1 + 3j))
assert not is_integer(np.nan)
assert not is_integer(None)
assert not is_integer('x')
assert not is_integer(datetime(2011, 1, 1))
assert not is_integer(np.datetime64('2011-01-01'))
assert not is_integer(Timestamp('2011-01-01'))
assert not is_integer(Timestamp('2011-01-01', tz='US/Eastern'))
assert not is_integer(timedelta(1000))
assert not is_integer(Timedelta('1 days'))
# questionable
assert is_integer(np.timedelta64(1, 'D'))
def test_is_float(self):
assert is_float(1.1)
assert is_float(np.float64(1.1))
assert is_float(np.nan)
assert not is_float(True)
assert not is_float(1)
assert not is_float(1 + 3j)
assert not is_float(np.bool(False))
assert not is_float(np.bool_(False))
assert not is_float(np.int64(1))
assert not is_float(np.complex128(1 + 3j))
assert not is_float(None)
assert not is_float('x')
assert not is_float(datetime(2011, 1, 1))
assert not is_float(np.datetime64('2011-01-01'))
assert not is_float(Timestamp('2011-01-01'))
assert not is_float(Timestamp('2011-01-01', tz='US/Eastern'))
assert not is_float(timedelta(1000))
assert not is_float(np.timedelta64(1, 'D'))
assert not is_float(Timedelta('1 days'))
def test_is_datetime_dtypes(self):
ts = pd.date_range('20130101', periods=3)
tsa = pd.date_range('20130101', periods=3, tz='US/Eastern')
assert is_datetime64_dtype('datetime64')
assert is_datetime64_dtype('datetime64[ns]')
assert is_datetime64_dtype(ts)
assert not is_datetime64_dtype(tsa)
assert not is_datetime64_ns_dtype('datetime64')
assert is_datetime64_ns_dtype('datetime64[ns]')
assert is_datetime64_ns_dtype(ts)
assert is_datetime64_ns_dtype(tsa)
assert is_datetime64_any_dtype('datetime64')
assert is_datetime64_any_dtype('datetime64[ns]')
assert is_datetime64_any_dtype(ts)
assert is_datetime64_any_dtype(tsa)
assert not is_datetime64tz_dtype('datetime64')
assert not is_datetime64tz_dtype('datetime64[ns]')
assert not is_datetime64tz_dtype(ts)
assert is_datetime64tz_dtype(tsa)
for tz in ['US/Eastern', 'UTC']:
dtype = 'datetime64[ns, {}]'.format(tz)
assert not is_datetime64_dtype(dtype)
assert is_datetime64tz_dtype(dtype)
assert is_datetime64_ns_dtype(dtype)
assert is_datetime64_any_dtype(dtype)
def test_is_timedelta(self):
assert is_timedelta64_dtype('timedelta64')
assert is_timedelta64_dtype('timedelta64[ns]')
assert not is_timedelta64_ns_dtype('timedelta64')
assert is_timedelta64_ns_dtype('timedelta64[ns]')
tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64')
assert is_timedelta64_dtype(tdi)
assert is_timedelta64_ns_dtype(tdi)
assert is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))
# Conversion to Int64Index:
assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64'))
assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))
class Testisscalar(object):
def test_isscalar_builtin_scalars(self):
assert is_scalar(None)
assert is_scalar(True)
assert is_scalar(False)
assert is_scalar(0.)
assert is_scalar(np.nan)
assert is_scalar('foobar')
assert is_scalar(b'foobar')
assert is_scalar(u('efoobar'))
assert is_scalar(datetime(2014, 1, 1))
assert is_scalar(date(2014, 1, 1))
assert is_scalar(time(12, 0))
assert is_scalar(timedelta(hours=1))
assert is_scalar(pd.NaT)
def test_isscalar_builtin_nonscalars(self):
assert not is_scalar({})
assert not is_scalar([])
assert not is_scalar([1])
assert not is_scalar(())
assert not is_scalar((1, ))
assert not is_scalar(slice(None))
assert not is_scalar(Ellipsis)
def test_isscalar_numpy_array_scalars(self):
assert is_scalar(np.int64(1))
assert is_scalar(np.float64(1.))
assert is_scalar(np.int32(1))
assert is_scalar(np.object_('foobar'))
assert is_scalar(np.str_('foobar'))
assert is_scalar(np.unicode_(u('foobar')))
assert is_scalar(np.bytes_(b'foobar'))
assert is_scalar(np.datetime64('2014-01-01'))
assert is_scalar(np.timedelta64(1, 'h'))
def test_isscalar_numpy_zerodim_arrays(self):
for zerodim in [np.array(1), np.array('foobar'),
np.array(np.datetime64('2014-01-01')),
np.array(np.timedelta64(1, 'h')),
np.array(np.datetime64('NaT'))]:
assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))
def test_isscalar_numpy_arrays(self):
assert not is_scalar(np.array([]))
assert not is_scalar(np.array([[]]))
assert not is_scalar(np.matrix('1; 2'))
def test_isscalar_pandas_scalars(self):
assert is_scalar(Timestamp('2014-01-01'))
assert is_scalar(Timedelta(hours=1))
assert is_scalar(Period('2014-01-01'))
def test_lisscalar_pandas_containers(self):
assert not is_scalar(Series())
assert not is_scalar(Series([1]))
assert not is_scalar(DataFrame())
assert not is_scalar(DataFrame([[1]]))
with catch_warnings(record=True):
assert not is_scalar(Panel())
assert not is_scalar(Panel([[[1]]]))
assert not is_scalar(Index([]))
assert not is_scalar(Index([1]))
def test_datetimeindex_from_empty_datetime64_array():
for unit in ['ms', 'us', 'ns']:
idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
assert (len(idx) == 0)
def test_nan_to_nat_conversions():
df = DataFrame(dict({
'A': np.asarray(
lrange(10), dtype='float64'),
'B': Timestamp('20010101')
}))
df.iloc[3:6, :] = np.nan
result = df.loc[4, 'B'].value
assert (result == tslib.iNaT)
s = df['B'].copy()
s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan)
assert (isna(s[8]))
# numpy < 1.7.0 is wrong
from distutils.version import LooseVersion
if LooseVersion(np.__version__) >= '1.7.0':
assert (s[8].value == np.datetime64('NaT').astype(np.int64))
def test_is_scipy_sparse(spmatrix): # noqa: F811
tm._skip_if_no_scipy()
assert is_scipy_sparse(spmatrix([[0, 1]]))
assert not is_scipy_sparse(np.array([1]))
def test_ensure_int32():
values = np.arange(10, dtype=np.int32)
result = _ensure_int32(values)
assert (result.dtype == np.int32)
values = np.arange(10, dtype=np.int64)
result = _ensure_int32(values)
assert (result.dtype == np.int32)
def test_ensure_categorical():
values = np.arange(10, dtype=np.int32)
result = _ensure_categorical(values)
assert (result.dtype == 'category')
values = Categorical(values)
result = _ensure_categorical(values)
tm.assert_categorical_equal(result, values)