tests/dtypes/test_inference.py · alkaline-ml/pandas

alkaline-ml / pandas python

Repository URL to install this package:
Version: 1.1.1

/ tests / dtypes / test_inference.py

"""
These the test the public routines exposed in types/common.py
related to inference and not otherwise tested in types/test_common.py

"""
import collections
from collections import namedtuple
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from fractions import Fraction
from io import StringIO
from numbers import Number
import re

import numpy as np
import pytest
import pytz

from pandas._libs import lib, missing as libmissing
import pandas.util._test_decorators as td

from pandas.core.dtypes import inference
from pandas.core.dtypes.common import (
    ensure_int32,
    is_bool,
    is_datetime64_any_dtype,
    is_datetime64_dtype,
    is_datetime64_ns_dtype,
    is_datetime64tz_dtype,
    is_float,
    is_integer,
    is_number,
    is_scalar,
    is_scipy_sparse,
    is_timedelta64_dtype,
    is_timedelta64_ns_dtype,
)

import pandas as pd
from pandas import (
    Categorical,
    DataFrame,
    DateOffset,
    DatetimeIndex,
    Index,
    Interval,
    Period,
    Series,
    Timedelta,
    TimedeltaIndex,
    Timestamp,
)
import pandas._testing as tm
from pandas.core.arrays import IntegerArray


@pytest.fixture(params=[True, False], ids=str)
def coerce(request):
    return request.param


# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
    ([1], True, "list"),
    ([], True, "list-empty"),
    ((1,), True, "tuple"),
    (tuple(), True, "tuple-empty"),
    ({"a": 1}, True, "dict"),
    (dict(), True, "dict-empty"),
    ({"a", 1}, "set", "set"),
    (set(), "set", "set-empty"),
    (frozenset({"a", 1}), "set", "frozenset"),
    (frozenset(), "set", "frozenset-empty"),
    (iter([1, 2]), True, "iterator"),
    (iter([]), True, "iterator-empty"),
    ((x for x in [1, 2]), True, "generator"),
    ((_ for _ in []), True, "generator-empty"),
    (Series([1]), True, "Series"),
    (Series([], dtype=object), True, "Series-empty"),
    (Series(["a"]).str, True, "StringMethods"),
    (Series([], dtype="O").str, True, "StringMethods-empty"),
    (Index([1]), True, "Index"),
    (Index([]), True, "Index-empty"),
    (DataFrame([[1]]), True, "DataFrame"),
    (DataFrame(), True, "DataFrame-empty"),
    (np.ndarray((2,) * 1), True, "ndarray-1d"),
    (np.array([]), True, "ndarray-1d-empty"),
    (np.ndarray((2,) * 2), True, "ndarray-2d"),
    (np.array([[]]), True, "ndarray-2d-empty"),
    (np.ndarray((2,) * 3), True, "ndarray-3d"),
    (np.array([[[]]]), True, "ndarray-3d-empty"),
    (np.ndarray((2,) * 4), True, "ndarray-4d"),
    (np.array([[[[]]]]), True, "ndarray-4d-empty"),
    (np.array(2), False, "ndarray-0d"),
    (1, False, "int"),
    (b"123", False, "bytes"),
    (b"", False, "bytes-empty"),
    ("123", False, "string"),
    ("", False, "string-empty"),
    (str, False, "string-type"),
    (object(), False, "object"),
    (np.nan, False, "NaN"),
    (None, False, "None"),
]
objs, expected, ids = zip(*ll_params)


@pytest.fixture(params=zip(objs, expected), ids=ids)
def maybe_list_like(request):
    return request.param


def test_is_list_like(maybe_list_like):
    obj, expected = maybe_list_like
    expected = True if expected == "set" else expected
    assert inference.is_list_like(obj) == expected


def test_is_list_like_disallow_sets(maybe_list_like):
    obj, expected = maybe_list_like
    expected = False if expected == "set" else expected
    assert inference.is_list_like(obj, allow_sets=False) == expected


def test_is_list_like_recursion():
    # GH 33721
    # interpreter would crash with with SIGABRT
    def foo():
        inference.is_list_like([])
        foo()

    with pytest.raises(RecursionError):
        foo()


def test_is_sequence():
    is_seq = inference.is_sequence
    assert is_seq((1, 2))
    assert is_seq([1, 2])
    assert not is_seq("abcd")
    assert not is_seq(np.int64)

    class A:
        def __getitem__(self):
            return 1

    assert not is_seq(A())


def test_is_array_like():
    assert inference.is_array_like(Series([], dtype=object))
    assert inference.is_array_like(Series([1, 2]))
    assert inference.is_array_like(np.array(["a", "b"]))
    assert inference.is_array_like(Index(["2016-01-01"]))

    class DtypeList(list):
        dtype = "special"

    assert inference.is_array_like(DtypeList())

    assert not inference.is_array_like([1, 2, 3])
    assert not inference.is_array_like(tuple())
    assert not inference.is_array_like("foo")
    assert not inference.is_array_like(123)


@pytest.mark.parametrize(
    "inner",
    [
        [],
        [1],
        (1,),
        (1, 2),
        {"a": 1},
        {1, "a"},
        Series([1]),
        Series([], dtype=object),
        Series(["a"]).str,
        (x for x in range(5)),
    ],
)
@pytest.mark.parametrize("outer", [list, Series, np.array, tuple])
def test_is_nested_list_like_passes(inner, outer):
    result = outer([inner for _ in range(5)])
    assert inference.is_list_like(result)


@pytest.mark.parametrize(
    "obj",
    [
        "abc",
        [],
        [1],
        (1,),
        ["a"],
        "a",
        {"a"},
        [1, 2, 3],
        Series([1]),
        DataFrame({"A": [1]}),
        ([1, 2] for _ in range(5)),
    ],
)
def test_is_nested_list_like_fails(obj):
    assert not inference.is_nested_list_like(obj)


@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()])
def test_is_dict_like_passes(ll):
    assert inference.is_dict_like(ll)


@pytest.mark.parametrize(
    "ll",
    [
        "1",
        1,
        [1, 2],
        (1, 2),
        range(2),
        Index([1]),
        dict,
        collections.defaultdict,
        Series,
    ],
)
def test_is_dict_like_fails(ll):
    assert not inference.is_dict_like(ll)


@pytest.mark.parametrize("has_keys", [True, False])
@pytest.mark.parametrize("has_getitem", [True, False])
@pytest.mark.parametrize("has_contains", [True, False])
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
    class DictLike:
        def __init__(self, d):
            self.d = d

        if has_keys:

            def keys(self):
                return self.d.keys()

        if has_getitem:

            def __getitem__(self, key):
                return self.d.__getitem__(key)

        if has_contains:

            def __contains__(self, key) -> bool:
                return self.d.__contains__(key)

    d = DictLike({1: 2})
    result = inference.is_dict_like(d)
    expected = has_keys and has_getitem and has_contains

    assert result is expected


def test_is_file_like():
    class MockFile:
        pass

    is_file = inference.is_file_like

    data = StringIO("data")
    assert is_file(data)

    # No read / write attributes
    # No iterator attributes
    m = MockFile()
    assert not is_file(m)

    MockFile.write = lambda self: 0

    # Write attribute but not an iterator
    m = MockFile()
    assert not is_file(m)

    # gh-16530: Valid iterator just means we have the
    # __iter__ attribute for our purposes.
    MockFile.__iter__ = lambda self: self

    # Valid write-only file
    m = MockFile()
    assert is_file(m)

    del MockFile.write
    MockFile.read = lambda self: 0

    # Valid read-only file
    m = MockFile()
    assert is_file(m)

    # Iterator but no read / write attributes
    data = [1, 2, 3]
    assert not is_file(data)


test_tuple = collections.namedtuple("Test", ["a", "b", "c"])


@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)])
def test_is_names_tuple_passes(ll):
    assert inference.is_named_tuple(ll)


@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})])
def test_is_names_tuple_fails(ll):
    assert not inference.is_named_tuple(ll)


def test_is_hashable():

    # all new-style classes are hashable by default
    class HashableClass:
        pass

    class UnhashableClass1:
        __hash__ = None

    class UnhashableClass2:
        def __hash__(self):
            raise TypeError("Not hashable")

    hashable = (1, 3.14, np.float64(3.14), "a", tuple(), (1,), HashableClass())
    not_hashable = ([], UnhashableClass1())
    abc_hashable_not_really_hashable = (([],), UnhashableClass2())

    for i in hashable:
        assert inference.is_hashable(i)
    for i in not_hashable:
        assert not inference.is_hashable(i)
    for i in abc_hashable_not_really_hashable:
        assert not inference.is_hashable(i)

    # numpy.array is no longer collections.abc.Hashable as of
    # https://github.com/numpy/numpy/pull/5326, just test
    # is_hashable()
    assert not inference.is_hashable(np.array([]))


@pytest.mark.parametrize("ll", [re.compile("ad")])
def test_is_re_passes(ll):
    assert inference.is_re(ll)


@pytest.mark.parametrize("ll", ["x", 2, 3, object()])
def test_is_re_fails(ll):
    assert not inference.is_re(ll)


@pytest.mark.parametrize(
    "ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")]
)
def test_is_recompilable_passes(ll):
    assert inference.is_re_compilable(ll)


@pytest.mark.parametrize("ll", [1, [], object()])
def test_is_recompilable_fails(ll):
    assert not inference.is_re_compilable(ll)


class TestInference:
    @pytest.mark.parametrize(
        "arr",
        [
            np.array(list("abc"), dtype="S1"),
            np.array(list("abc"), dtype="S1").astype(object),
            [b"a", np.nan, b"c"],
        ],
    )
    def test_infer_dtype_bytes(self, arr):
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "bytes"

    @pytest.mark.parametrize(
        "value, expected",
        [
            (float("inf"), True),
            (np.inf, True),
            (-np.inf, False),
            (1, False),
            ("a", False),
        ],
    )
    def test_isposinf_scalar(self, value, expected):
        # GH 11352
        result = libmissing.isposinf_scalar(value)
        assert result is expected

    @pytest.mark.parametrize(
        "value, expected",
        [
            (float("-inf"), True),
            (-np.inf, True),
            (np.inf, False),
            (1, False),
            ("a", False),
        ],
    )
    def test_isneginf_scalar(self, value, expected):
        result = libmissing.isneginf_scalar(value)
        assert result is expected

    @pytest.mark.parametrize("coerce_numeric", [True, False])
    @pytest.mark.parametrize(
        "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]
    )
    @pytest.mark.parametrize("prefix", ["", "-", "+"])
    def test_maybe_convert_numeric_infinities(self, coerce_numeric, infinity, prefix):
        # see gh-13274
        result = lib.maybe_convert_numeric(
            np.array([prefix + infinity], dtype=object),
            na_values={"", "NULL", "nan"},
            coerce_numeric=coerce_numeric,
        )
        expected = np.array([np.inf if prefix in ["", "+"] else -np.inf])
        tm.assert_numpy_array_equal(result, expected)

    def test_maybe_convert_numeric_infinities_raises(self):
        msg = "Unable to parse string"
        with pytest.raises(ValueError, match=msg):
            lib.maybe_convert_numeric(
                np.array(["foo_inf"], dtype=object),
                na_values={"", "NULL", "nan"},
                coerce_numeric=False,
            )

    def test_maybe_convert_numeric_post_floatify_nan(self, coerce):
        # see gh-13314
        data = np.array(["1.200", "-999.000", "4.500"], dtype=object)
        expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
        nan_values = {-999, -999.0}

        out = lib.maybe_convert_numeric(data, nan_values, coerce)
        tm.assert_numpy_array_equal(out, expected)

    def test_convert_infs(self):
        arr = np.array(["inf", "inf", "inf"], dtype="O")
        result = lib.maybe_convert_numeric(arr, set(), False)
        assert result.dtype == np.float64

        arr = np.array(["-inf", "-inf", "-inf"], dtype="O")
        result = lib.maybe_convert_numeric(arr, set(), False)
        assert result.dtype == np.float64

    def test_scientific_no_exponent(self):
        # See PR 12215
        arr = np.array(["42E", "2E", "99e", "6e"], dtype="O")
        result = lib.maybe_convert_numeric(arr, set(), False, True)
        assert np.all(np.isnan(result))

    def test_convert_non_hashable(self):
        # GH13324
        # make sure that we are handing non-hashables
        arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object)
        result = lib.maybe_convert_numeric(arr, set(), False, True)
        tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))

    def test_convert_numeric_uint64(self):
        arr = np.array([2 ** 63], dtype=object)
        exp = np.array([2 ** 63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([str(2 ** 63)], dtype=object)
        exp = np.array([2 ** 63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([np.uint64(2 ** 63)], dtype=object)
        exp = np.array([2 ** 63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

    @pytest.mark.parametrize(
        "arr",
        [
            np.array([2 ** 63, np.nan], dtype=object),
            np.array([str(2 ** 63), np.nan], dtype=object),
            np.array([np.nan, 2 ** 63], dtype=object),
            np.array([np.nan, str(2 ** 63)], dtype=object),
        ],
    )
    def test_convert_numeric_uint64_nan(self, coerce, arr):
        expected = arr.astype(float) if coerce else arr.copy()
        result = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce)
        tm.assert_almost_equal(result, expected)

    def test_convert_numeric_uint64_nan_values(self, coerce):
        arr = np.array([2 ** 63, 2 ** 63 + 1], dtype=object)
        na_values = {2 ** 63}

        expected = (
            np.array([np.nan, 2 ** 63 + 1], dtype=float) if coerce else arr.copy()
        )
        result = lib.maybe_convert_numeric(arr, na_values, coerce_numeric=coerce)
        tm.assert_almost_equal(result, expected)

    @pytest.mark.parametrize(
        "case",
        [
            np.array([2 ** 63, -1], dtype=object),
            np.array([str(2 ** 63), -1], dtype=object),
            np.array([str(2 ** 63), str(-1)], dtype=object),
            np.array([-1, 2 ** 63], dtype=object),
            np.array([-1, str(2 ** 63)], dtype=object),
            np.array([str(-1), str(2 ** 63)], dtype=object),
        ],
    )
    def test_convert_numeric_int64_uint64(self, case, coerce):
        expected = case.astype(float) if coerce else case.copy()
        result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
        tm.assert_almost_equal(result, expected)

    def test_convert_numeric_string_uint64(self):
        # GH32394
        result = lib.maybe_convert_numeric(
            np.array(["uint64"], dtype=object), set(), coerce_numeric=True
        )
        assert np.isnan(result)

    @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64])
    def test_convert_int_overflow(self, value):
        # see gh-18584
        arr = np.array([value], dtype=object)
        result = lib.maybe_convert_objects(arr)
        tm.assert_numpy_array_equal(arr, result)

    def test_maybe_convert_objects_uint64(self):
        # see gh-4471
        arr = np.array([2 ** 63], dtype=object)
        exp = np.array([2 ** 63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        # NumPy bug: can't compare uint64 to int64, as that
        # results in both casting to float64, so we should
        # make sure that this function is robust against it
        arr = np.array([np.uint64(2 ** 63)], dtype=object)
        exp = np.array([2 ** 63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2, -1], dtype=object)
        exp = np.array([2, -1], dtype=np.int64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2 ** 63, -1], dtype=object)
        exp = np.array([2 ** 63, -1], dtype=object)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

    def test_maybe_convert_objects_datetime(self):
        # GH27438
        arr = np.array(
            [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object
        )
        exp = arr.copy()
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
        exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

        arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
        exp = arr.copy()
        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
        tm.assert_numpy_array_equal(out, exp)

    @pytest.mark.parametrize(
        "exp",
        [
            IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])),
            IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])),
        ],
    )
    def test_maybe_convert_objects_nullable_integer(self, exp):
        # GH27335
        arr = np.array([2, np.NaN], dtype=object)
        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)

        tm.assert_extension_array_equal(result, exp)

    def test_maybe_convert_objects_bool_nan(self):
        # GH32146
        ind = pd.Index([True, False, np.nan], dtype=object)
        exp = np.array([True, False, np.nan], dtype=object)
        out = lib.maybe_convert_objects(ind.values, safe=1)
        tm.assert_numpy_array_equal(out, exp)

    def test_mixed_dtypes_remain_object_array(self):
        # GH14956
        array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
        result = lib.maybe_convert_objects(array, convert_datetime=1)
        tm.assert_numpy_array_equal(result, array)


class TestTypeInference:

    # Dummy class used for testing with Python objects
    class Dummy:
        pass

    def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
        # see pandas/conftest.py
        inferred_dtype, values = any_skipna_inferred_dtype

        # make sure the inferred dtype of the fixture is as requested
        assert inferred_dtype == lib.infer_dtype(values, skipna=True)

    @pytest.mark.parametrize("skipna", [True, False])
    def test_length_zero(self, skipna):
        result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna)
        assert result == "integer"

        result = lib.infer_dtype([], skipna=skipna)
        assert result == "empty"

        # GH 18004
        arr = np.array([np.array([], dtype=object), np.array([], dtype=object)])
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "empty"

    def test_integers(self):
        arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "integer"

        arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "mixed-integer"

        arr = np.array([1, 2, 3, 4, 5], dtype="i4")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "integer"

    @pytest.mark.parametrize(
        "arr, skipna",
        [
            (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False),
            (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True),
            (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False),
            (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True),
        ],
    )
    def test_integer_na(self, arr, skipna):
        # GH 27392
        result = lib.infer_dtype(arr, skipna=skipna)
        expected = "integer" if skipna else "integer-na"
        assert result == expected

    def test_infer_dtype_skipna_default(self):
        # infer_dtype `skipna` default deprecated in GH#24050,
        #  changed to True in GH#29876
        arr = np.array([1, 2, 3, np.nan], dtype=object)

        result = lib.infer_dtype(arr)
        assert result == "integer"

    def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "boolean"

        arr = np.array([np.bool_(True), np.bool_(False)], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "boolean"

        arr = np.array([True, False, True, "foo"], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "mixed"

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "boolean"

        arr = np.array([True, np.nan, False], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "boolean"

        result = lib.infer_dtype(arr, skipna=False)
        assert result == "mixed"

    def test_floats(self):
        arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "floating"

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "mixed-integer"

        arr = np.array([1, 2, 3, 4, 5], dtype="f4")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "floating"

        arr = np.array([1, 2, 3, 4, 5], dtype="f8")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "floating"

    def test_decimals(self):
        # GH15690
        arr = np.array([Decimal(1), Decimal(2), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "decimal"

        arr = np.array([1.0, 2.0, Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "mixed"

        arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "decimal"

        arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O")
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "decimal"

    # complex is compatible with nan, so skipna has no effect
    @pytest.mark.parametrize("skipna", [True, False])
    def test_complex(self, skipna):
        # gets cast to complex on array construction
        arr = np.array([1.0, 2.0, 1 + 1j])
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "complex"

        arr = np.array([1.0, 2.0, 1 + 1j], dtype="O")
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "mixed"

        # gets cast to complex on array construction
        arr = np.array([1, np.nan, 1 + 1j])
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "complex"

        arr = np.array([1.0, np.nan, 1 + 1j], dtype="O")
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "mixed"

        # complex with nans stays complex
        arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O")
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "complex"

        # test smaller complex dtype; will pass through _try_infer_map fastpath
        arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64)
        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == "complex"

    def test_string(self):
        pass

    def test_unicode(self):
        arr = ["a", np.nan, "c"]
        result = lib.infer_dtype(arr, skipna=False)
        # This currently returns "mixed", but it's not clear that's optimal.
        # This could also return "string" or "mixed-string"
        assert result == "mixed"

        arr = ["a", np.nan, "c"]
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "string"

        arr = ["a", "c"]
        result = lib.infer_dtype(arr, skipna=False)
        assert result == "string"

    @pytest.mark.parametrize(
        "dtype, missing, skipna, expected",
        [
            (float, np.nan, False, "floating"),
            (float, np.nan, True, "floating"),
            (object, np.nan, False, "floating"),
            (object, np.nan, True, "empty"),
            (object, None, False, "mixed"),
            (object, None, True, "empty"),
        ],
    )
    @pytest.mark.parametrize("box", [pd.Series, np.array])
    def test_object_empty(self, box, missing, dtype, skipna, expected):
        # GH 23421
        arr = box([missing, missing], dtype=dtype)

        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == expected

    def test_datetime(self):

        dates = [datetime(2012, 1, x) for x in range(1, 20)]
        index = Index(dates)
        assert index.inferred_type == "datetime64"

    def test_infer_dtype_datetime64(self):
        arr = np.array(
            [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object
        )
        assert lib.infer_dtype(arr, skipna=True) == "datetime64"

    @pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
    def test_infer_dtype_datetime64_with_na(self, na_value):
        # starts with nan
        arr = np.array([na_value, np.datetime64("2011-01-02")])
        assert lib.infer_dtype(arr, skipna=True) == "datetime64"

        arr = np.array([na_value, np.datetime64("2011-01-02"), na_value])
        assert lib.infer_dtype(arr, skipna=True) == "datetime64"

    @pytest.mark.parametrize(
        "arr",
        [
            np.array(
                [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object
            ),
            np.array(
                [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object
            ),
            np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]),
            np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]),
            np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]),
            np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]),
            np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object),
            np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object),
        ],
    )
    def test_infer_datetimelike_dtype_mixed(self, arr):
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

    def test_infer_dtype_mixed_integer(self):
        arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1])
        assert lib.infer_dtype(arr, skipna=True) == "mixed-integer"

    @pytest.mark.parametrize(
        "arr",
        [
            np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]),
            np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]),
            np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]),
        ],
    )
    def test_infer_dtype_datetime(self, arr):
        assert lib.infer_dtype(arr, skipna=True) == "datetime"

    @pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
    @pytest.mark.parametrize(
        "time_stamp", [pd.Timestamp("2011-01-01"), datetime(2011, 1, 1)]
    )
    def test_infer_dtype_datetime_with_na(self, na_value, time_stamp):
        # starts with nan
        arr = np.array([na_value, time_stamp])
        assert lib.infer_dtype(arr, skipna=True) == "datetime"

        arr = np.array([na_value, time_stamp, na_value])
        assert lib.infer_dtype(arr, skipna=True) == "datetime"

    @pytest.mark.parametrize(
        "arr",
        [
            np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]),
            np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object),
            np.array([timedelta(1), timedelta(2)]),
        ],
    )
    def test_infer_dtype_timedelta(self, arr):
        assert lib.infer_dtype(arr, skipna=True) == "timedelta"

    @pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
    @pytest.mark.parametrize(
        "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)]
    )
    def test_infer_dtype_timedelta_with_na(self, na_value, delta):
        # starts with nan
        arr = np.array([na_value, delta])
        assert lib.infer_dtype(arr, skipna=True) == "timedelta"

        arr = np.array([na_value, delta, na_value])
        assert lib.infer_dtype(arr, skipna=True) == "timedelta"

    def test_infer_dtype_period(self):
        # GH 13664
        arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="D")])
        assert lib.infer_dtype(arr, skipna=True) == "period"

        arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="M")])
        assert lib.infer_dtype(arr, skipna=True) == "period"

    def test_infer_dtype_period_mixed(self):
        arr = np.array(
            [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object
        )
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

        arr = np.array(
            [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object
        )
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

    @pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
    def test_infer_dtype_period_with_na(self, na_value):
        # starts with nan
        arr = np.array([na_value, pd.Period("2011-01", freq="D")])
        assert lib.infer_dtype(arr, skipna=True) == "period"

        arr = np.array([na_value, pd.Period("2011-01", freq="D"), na_value])
        assert lib.infer_dtype(arr, skipna=True) == "period"

    @pytest.mark.parametrize(
        "data",
        [
            [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)],
            [Timestamp("20170612"), Timestamp("20170311")],
            [
                Timestamp("20170612", tz="US/Eastern"),
                Timestamp("20170311", tz="US/Eastern"),
            ],
            [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")],
            [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")],
            [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)],
        ],
    )
    def test_infer_datetimelike_array_datetime(self, data):
        assert lib.infer_datetimelike_array(data) == "datetime"

    @pytest.mark.parametrize(
        "data",
        [
            [timedelta(2017, 6, 12), timedelta(2017, 3, 11)],
            [timedelta(2017, 6, 12), date(2017, 3, 11)],
            [np.timedelta64(2017, "D"), np.timedelta64(6, "s")],
            [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)],
        ],
    )
    def test_infer_datetimelike_array_timedelta(self, data):
        assert lib.infer_datetimelike_array(data) == "timedelta"

    def test_infer_datetimelike_array_date(self):
        arr = [date(2017, 6, 12), date(2017, 3, 11)]
        assert lib.infer_datetimelike_array(arr) == "date"

    @pytest.mark.parametrize(
        "data",
        [
            ["2017-06-12", "2017-03-11"],
            [20170612, 20170311],
            [20170612.5, 20170311.8],
            [Dummy(), Dummy()],
            [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")],
            [Timestamp("20170612"), 20170311],
            [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")],
        ],
    )
    def test_infer_datetimelike_array_mixed(self, data):
        assert lib.infer_datetimelike_array(data) == "mixed"

    @pytest.mark.parametrize(
        "first, expected",
        [
            [[None], "mixed"],
            [[np.nan], "mixed"],
            [[pd.NaT], "nat"],
            [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"],
            [[np.datetime64("2017-06-12"), pd.NaT], "datetime"],
            [[date(2017, 6, 12), pd.NaT], "date"],
            [[timedelta(2017, 6, 12), pd.NaT], "timedelta"],
            [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"],
        ],
    )
    @pytest.mark.parametrize("second", [None, np.nan])
    def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected):
        first.append(second)
        assert lib.infer_datetimelike_array(first) == expected

    def test_infer_dtype_all_nan_nat_like(self):
        arr = np.array([np.nan, np.nan])
        assert lib.infer_dtype(arr, skipna=True) == "floating"

        # nan and None mix are result in mixed
        arr = np.array([np.nan, np.nan, None])
        assert lib.infer_dtype(arr, skipna=True) == "empty"
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

        arr = np.array([None, np.nan, np.nan])
        assert lib.infer_dtype(arr, skipna=True) == "empty"
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

        # pd.NaT
        arr = np.array([pd.NaT])
        assert lib.infer_dtype(arr, skipna=False) == "datetime"

        arr = np.array([pd.NaT, np.nan])
        assert lib.infer_dtype(arr, skipna=False) == "datetime"

        arr = np.array([np.nan, pd.NaT])
        assert lib.infer_dtype(arr, skipna=False) == "datetime"

        arr = np.array([np.nan, pd.NaT, np.nan])
        assert lib.infer_dtype(arr, skipna=False) == "datetime"

        arr = np.array([None, pd.NaT, None])
        assert lib.infer_dtype(arr, skipna=False) == "datetime"

        # np.datetime64(nat)
        arr = np.array([np.datetime64("nat")])
        assert lib.infer_dtype(arr, skipna=False) == "datetime64"

        for n in [np.nan, pd.NaT, None]:
            arr = np.array([n, np.datetime64("nat"), n])
            assert lib.infer_dtype(arr, skipna=False) == "datetime64"

            arr = np.array([pd.NaT, n, np.datetime64("nat"), n])
            assert lib.infer_dtype(arr, skipna=False) == "datetime64"

        arr = np.array([np.timedelta64("nat")], dtype=object)
        assert lib.infer_dtype(arr, skipna=False) == "timedelta"

        for n in [np.nan, pd.NaT, None]:
            arr = np.array([n, np.timedelta64("nat"), n])
            assert lib.infer_dtype(arr, skipna=False) == "timedelta"

            arr = np.array([pd.NaT, n, np.timedelta64("nat"), n])
            assert lib.infer_dtype(arr, skipna=False) == "timedelta"

        # datetime / timedelta mixed
        arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan])
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

        arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object)
        assert lib.infer_dtype(arr, skipna=False) == "mixed"

    def test_is_datetimelike_array_all_nan_nat_like(self):
        arr = np.array([np.nan, pd.NaT, np.datetime64("nat")])
        assert lib.is_datetime_array(arr)
        assert lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")])
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")])
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, pd.NaT])
        assert lib.is_datetime_array(arr)
        assert lib.is_datetime64_array(arr)
        assert lib.is_timedelta_or_timedelta64_array(arr)

        arr = np.array([np.nan, np.nan], dtype=object)
        assert not lib.is_datetime_array(arr)
        assert not lib.is_datetime64_array(arr)
        assert not lib.is_timedelta_or_timedelta64_array(arr)

        assert lib.is_datetime_with_singletz_array(
            np.array(
                [
                    pd.Timestamp("20130101", tz="US/Eastern"),
                    pd.Timestamp("20130102", tz="US/Eastern"),
                ],
                dtype=object,
            )
        )
        assert not lib.is_datetime_with_singletz_array(
            np.array(
                [
                    pd.Timestamp("20130101", tz="US/Eastern"),
                    pd.Timestamp("20130102", tz="CET"),
                ],
                dtype=object,
            )
        )

    @pytest.mark.parametrize(
        "func",
        [
            "is_datetime_array",
            "is_datetime64_array",
            "is_bool_array",
            "is_timedelta_or_timedelta64_array",
            "is_date_array",
            "is_time_array",
            "is_interval_array",
            "is_period_array",
        ],
    )
    def test_other_dtypes_for_array(self, func):
        func = getattr(lib, func)
        arr = np.array(["foo", "bar"])
        assert not func(arr)

        arr = np.array([1, 2])
        assert not func(arr)

    def test_date(self):

        dates = [date(2012, 1, day) for day in range(1, 20)]
        index = Index(dates)
        assert index.inferred_type == "date"

        dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan]
        result = lib.infer_dtype(dates, skipna=False)
        assert result == "mixed"

        result = lib.infer_dtype(dates, skipna=True)
        assert result == "date"

    @pytest.mark.parametrize(
        "values",
        [
            [date(2020, 1, 1), pd.Timestamp("2020-01-01")],
            [pd.Timestamp("2020-01-01"), date(2020, 1, 1)],
            [date(2020, 1, 1), pd.NaT],
            [pd.NaT, date(2020, 1, 1)],
        ],
    )
    @pytest.mark.parametrize("skipna", [True, False])
    def test_infer_dtype_date_order_invariant(self, values, skipna):
        # https://github.com/pandas-dev/pandas/issues/33741
        result = lib.infer_dtype(values, skipna=skipna)
        assert result == "date"

    def test_is_numeric_array(self):

        assert lib.is_float_array(np.array([1, 2.0]))
        assert lib.is_float_array(np.array([1, 2.0, np.nan]))
        assert not lib.is_float_array(np.array([1, 2]))

        assert lib.is_integer_array(np.array([1, 2]))
        assert not lib.is_integer_array(np.array([1, 2.0]))

    def test_is_string_array(self):

        assert lib.is_string_array(np.array(["foo", "bar"]))
        assert not lib.is_string_array(
            np.array(["foo", "bar", pd.NA], dtype=object), skipna=False
        )
        assert lib.is_string_array(
            np.array(["foo", "bar", pd.NA], dtype=object), skipna=True
        )
        # NaN is not valid for string array, just NA
        assert not lib.is_string_array(
            np.array(["foo", "bar", np.nan], dtype=object), skipna=True
        )

        assert not lib.is_string_array(np.array([1, 2]))

    def test_to_object_array_tuples(self):
        r = (5, 6)
        values = [r]
        lib.to_object_array_tuples(values)

        # make sure record array works
        record = namedtuple("record", "x y")
        r = record(5, 6)
        values = [r]
        lib.to_object_array_tuples(values)

    def test_object(self):

        # GH 7431
        # cannot infer more than this as only a single element
        arr = np.array([None], dtype="O")
        result = lib.infer_dtype(arr, skipna=False)
        assert result == "mixed"
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "empty"

    def test_to_object_array_width(self):
        # see gh-13320
        rows = [[1, 2, 3], [4, 5, 6]]

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows, min_width=1)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array(
            [[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object
        )
        out = lib.to_object_array(rows, min_width=5)
        tm.assert_numpy_array_equal(out, expected)

    def test_is_period(self):
        assert lib.is_period(pd.Period("2011-01", freq="M"))
        assert not lib.is_period(pd.PeriodIndex(["2011-01"], freq="M"))
        assert not lib.is_period(pd.Timestamp("2011-01"))
        assert not lib.is_period(1)
        assert not lib.is_period(np.nan)

    def test_categorical(self):

        # GH 8974
        arr = Categorical(list("abc"))
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "categorical"

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == "categorical"

        arr = Categorical(list("abc"), categories=["cegfab"], ordered=True)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == "categorical"

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == "categorical"

    def test_interval(self):
        idx = pd.IntervalIndex.from_breaks(range(5), closed="both")
        inferred = lib.infer_dtype(idx, skipna=False)
        assert inferred == "interval"

        inferred = lib.infer_dtype(idx._data, skipna=False)
        assert inferred == "interval"

        inferred = lib.infer_dtype(pd.Series(idx), skipna=False)
        assert inferred == "interval"

    @pytest.mark.parametrize("klass", [pd.array, pd.Series])
    @pytest.mark.parametrize("skipna", [True, False])
    @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])
    def test_string_dtype(self, data, skipna, klass):
        # StringArray
        val = klass(data, dtype="string")
        inferred = lib.infer_dtype(val, skipna=skipna)
        assert inferred == "string"

    @pytest.mark.parametrize("klass", [pd.array, pd.Series])
    @pytest.mark.parametrize("skipna", [True, False])
    @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]])
    def test_boolean_dtype(self, data, skipna, klass):
        # BooleanArray
        val = klass(data, dtype="boolean")
        inferred = lib.infer_dtype(val, skipna=skipna)
        assert inferred == "boolean"


class TestNumberScalar:
    def test_is_number(self):

        assert is_number(True)
        assert is_number(1)
        assert is_number(1.1)
        assert is_number(1 + 3j)
        assert is_number(np.int64(1))
        assert is_number(np.float64(1.1))
        assert is_number(np.complex128(1 + 3j))
        assert is_number(np.nan)

        assert not is_number(None)
        assert not is_number("x")
        assert not is_number(datetime(2011, 1, 1))
        assert not is_number(np.datetime64("2011-01-01"))
        assert not is_number(Timestamp("2011-01-01"))
        assert not is_number(Timestamp("2011-01-01", tz="US/Eastern"))
        assert not is_number(timedelta(1000))
        assert not is_number(Timedelta("1 days"))

        # questionable
        assert not is_number(np.bool_(False))
        assert is_number(np.timedelta64(1, "D"))

    def test_is_bool(self):
        assert is_bool(True)
        assert is_bool(False)
        assert is_bool(np.bool_(False))

        assert not is_bool(1)
        assert not is_bool(1.1)
        assert not is_bool(1 + 3j)
        assert not is_bool(np.int64(1))
        assert not is_bool(np.float64(1.1))
        assert not is_bool(np.complex128(1 + 3j))
        assert not is_bool(np.nan)
        assert not is_bool(None)
        assert not is_bool("x")
        assert not is_bool(datetime(2011, 1, 1))
        assert not is_bool(np.datetime64("2011-01-01"))
        assert not is_bool(Timestamp("2011-01-01"))
        assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern"))
        assert not is_bool(timedelta(1000))
        assert not is_bool(np.timedelta64(1, "D"))
        assert not is_bool(Timedelta("1 days"))

    def test_is_integer(self):
        assert is_integer(1)
        assert is_integer(np.int64(1))

        assert not is_integer(True)
        assert not is_integer(1.1)
        assert not is_integer(1 + 3j)
        assert not is_integer(False)
        assert not is_integer(np.bool_(False))
        assert not is_integer(np.float64(1.1))
        assert not is_integer(np.complex128(1 + 3j))
        assert not is_integer(np.nan)
        assert not is_integer(None)
        assert not is_integer("x")
        assert not is_integer(datetime(2011, 1, 1))
        assert not is_integer(np.datetime64("2011-01-01"))
        assert not is_integer(Timestamp("2011-01-01"))
        assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern"))
        assert not is_integer(timedelta(1000))
        assert not is_integer(Timedelta("1 days"))
        assert not is_integer(np.timedelta64(1, "D"))

    def test_is_float(self):
        assert is_float(1.1)
        assert is_float(np.float64(1.1))
        assert is_float(np.nan)

        assert not is_float(True)
        assert not is_float(1)
        assert not is_float(1 + 3j)
        assert not is_float(False)
        assert not is_float(np.bool_(False))
        assert not is_float(np.int64(1))
        assert not is_float(np.complex128(1 + 3j))
        assert not is_float(None)
        assert not is_float("x")
        assert not is_float(datetime(2011, 1, 1))
        assert not is_float(np.datetime64("2011-01-01"))
        assert not is_float(Timestamp("2011-01-01"))
        assert not is_float(Timestamp("2011-01-01", tz="US/Eastern"))
        assert not is_float(timedelta(1000))
        assert not is_float(np.timedelta64(1, "D"))
        assert not is_float(Timedelta("1 days"))

    def test_is_datetime_dtypes(self):

        ts = pd.date_range("20130101", periods=3)
        tsa = pd.date_range("20130101", periods=3, tz="US/Eastern")

        assert is_datetime64_dtype("datetime64")
        assert is_datetime64_dtype("datetime64[ns]")
        assert is_datetime64_dtype(ts)
        assert not is_datetime64_dtype(tsa)

        assert not is_datetime64_ns_dtype("datetime64")
        assert is_datetime64_ns_dtype("datetime64[ns]")
        assert is_datetime64_ns_dtype(ts)
        assert is_datetime64_ns_dtype(tsa)

        assert is_datetime64_any_dtype("datetime64")
        assert is_datetime64_any_dtype("datetime64[ns]")
        assert is_datetime64_any_dtype(ts)
        assert is_datetime64_any_dtype(tsa)

        assert not is_datetime64tz_dtype("datetime64")
        assert not is_datetime64tz_dtype("datetime64[ns]")
        assert not is_datetime64tz_dtype(ts)
        assert is_datetime64tz_dtype(tsa)

        for tz in ["US/Eastern", "UTC"]:
            dtype = f"datetime64[ns, {tz}]"
            assert not is_datetime64_dtype(dtype)
            assert is_datetime64tz_dtype(dtype)
            assert is_datetime64_ns_dtype(dtype)
            assert is_datetime64_any_dtype(dtype)

    def test_is_timedelta(self):
        assert is_timedelta64_dtype("timedelta64")
        assert is_timedelta64_dtype("timedelta64[ns]")
        assert not is_timedelta64_ns_dtype("timedelta64")
        assert is_timedelta64_ns_dtype("timedelta64[ns]")

        tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]")
        assert is_timedelta64_dtype(tdi)
        assert is_timedelta64_ns_dtype(tdi)
        assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]"))

        # Conversion to Int64Index:
        assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64"))
        assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]"))


class TestIsScalar:
    def test_is_scalar_builtin_scalars(self):
        assert is_scalar(None)
        assert is_scalar(True)
        assert is_scalar(False)
        assert is_scalar(Fraction())
        assert is_scalar(0.0)
        assert is_scalar(1)
        assert is_scalar(complex(2))
        assert is_scalar(float("NaN"))
        assert is_scalar(np.nan)
        assert is_scalar("foobar")
        assert is_scalar(b"foobar")
        assert is_scalar(datetime(2014, 1, 1))
        assert is_scalar(date(2014, 1, 1))
        assert is_scalar(time(12, 0))
        assert is_scalar(timedelta(hours=1))
        assert is_scalar(pd.NaT)
        assert is_scalar(pd.NA)

    def test_is_scalar_builtin_nonscalars(self):
        assert not is_scalar({})
        assert not is_scalar([])
        assert not is_scalar([1])
        assert not is_scalar(())
        assert not is_scalar((1,))
        assert not is_scalar(slice(None))
        assert not is_scalar(Ellipsis)

    def test_is_scalar_numpy_array_scalars(self):
        assert is_scalar(np.int64(1))
        assert is_scalar(np.float64(1.0))
        assert is_scalar(np.int32(1))
        assert is_scalar(np.complex64(2))
        assert is_scalar(np.object_("foobar"))
        assert is_scalar(np.str_("foobar"))
        assert is_scalar(np.unicode_("foobar"))
        assert is_scalar(np.bytes_(b"foobar"))
        assert is_scalar(np.datetime64("2014-01-01"))
        assert is_scalar(np.timedelta64(1, "h"))

    def test_is_scalar_numpy_zerodim_arrays(self):
        for zerodim in [
            np.array(1),
            np.array("foobar"),
            np.array(np.datetime64("2014-01-01")),
            np.array(np.timedelta64(1, "h")),
            np.array(np.datetime64("NaT")),
        ]:
            assert not is_scalar(zerodim)
            assert is_scalar(lib.item_from_zerodim(zerodim))

    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
    def test_is_scalar_numpy_arrays(self):
        assert not is_scalar(np.array([]))
        assert not is_scalar(np.array([[]]))
        assert not is_scalar(np.matrix("1; 2"))

    def test_is_scalar_pandas_scalars(self):
        assert is_scalar(Timestamp("2014-01-01"))
        assert is_scalar(Timedelta(hours=1))
        assert is_scalar(Period("2014-01-01"))
        assert is_scalar(Interval(left=0, right=1))
        assert is_scalar(DateOffset(days=1))
        assert is_scalar(pd.offsets.Minute(3))

    def test_is_scalar_pandas_containers(self):
        assert not is_scalar(Series(dtype=object))
        assert not is_scalar(Series([1]))
        assert not is_scalar(DataFrame())
        assert not is_scalar(DataFrame([[1]]))
        assert not is_scalar(Index([]))
        assert not is_scalar(Index([1]))
        assert not is_scalar(Categorical([]))
        assert not is_scalar(DatetimeIndex([])._data)
        assert not is_scalar(TimedeltaIndex([])._data)
        assert not is_scalar(DatetimeIndex([])._data.to_period("D"))
        assert not is_scalar(pd.array([1, 2, 3]))

    def test_is_scalar_number(self):
        # Number() is not recognied by PyNumber_Check, so by extension
        #  is not recognized by is_scalar, but instances of non-abstract
        #  subclasses are.

        class Numeric(Number):
            def __init__(self, value):
                self.value = value

            def __int__(self):
                return self.value

        num = Numeric(1)
        assert is_scalar(num)


def test_datetimeindex_from_empty_datetime64_array():
    for unit in ["ms", "us", "ns"]:
        idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]"))
        assert len(idx) == 0


def test_nan_to_nat_conversions():

    df = DataFrame(
        dict({"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")})
    )
    df.iloc[3:6, :] = np.nan
    result = df.loc[4, "B"]
    assert result is pd.NaT

    s = df["B"].copy()
    s[8:9] = np.nan
    assert s[8] is pd.NaT


@td.skip_if_no_scipy
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scipy_sparse(spmatrix):  # noqa: F811
    assert is_scipy_sparse(spmatrix([[0, 1]]))
    assert not is_scipy_sparse(np.array([1]))


def test_ensure_int32():
    values = np.arange(10, dtype=np.int32)
    result = ensure_int32(values)
    assert result.dtype == np.int32

    values = np.arange(10, dtype=np.int64)
    result = ensure_int32(values)
    assert result.dtype == np.int32
alkaline-ml / pandas python

Version: 1.1.1

/ tests / dtypes / test_inference.py

Products

About

Resources

Contact Gemfury