Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / dtypes / test_inference.py

# -*- coding: utf-8 -*-

"""
These the test the public routines exposed in types/common.py
related to inference and not otherwise tested in types/test_common.py

"""
import collections
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from fractions import Fraction
from numbers import Number
import re
from warnings import catch_warnings, simplefilter

import numpy as np
import pytest
import pytz

from pandas._libs import iNaT, lib, missing as libmissing
from pandas.compat import PY2, StringIO, lrange, u
import pandas.util._test_decorators as td

from pandas.core.dtypes import inference
from pandas.core.dtypes.common import (
    ensure_categorical, ensure_int32, is_bool, is_datetime64_any_dtype,
    is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
    is_float, is_integer, is_number, is_scalar, is_scipy_sparse,
    is_timedelta64_dtype, is_timedelta64_ns_dtype)

import pandas as pd
from pandas import (
    Categorical, DataFrame, DateOffset, DatetimeIndex, Index, Interval, Panel,
    Period, Series, Timedelta, TimedeltaIndex, Timestamp, compat, isna)
from pandas.util import testing as tm


@pytest.fixture(params=[True, False], ids=str)
def coerce(request):
    return request.param


# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
    ([1],                       True,  'list'),                 # noqa: E241
    ([],                        True,  'list-empty'),           # noqa: E241
    ((1, ),                     True,  'tuple'),                # noqa: E241
    (tuple(),                   True,  'tuple-empty'),          # noqa: E241
    ({'a': 1},                  True,  'dict'),                 # noqa: E241
    (dict(),                    True,  'dict-empty'),           # noqa: E241
    ({'a', 1},                  'set', 'set'),                  # noqa: E241
    (set(),                     'set', 'set-empty'),            # noqa: E241
    (frozenset({'a', 1}),       'set', 'frozenset'),            # noqa: E241
    (frozenset(),               'set', 'frozenset-empty'),      # noqa: E241
    (iter([1, 2]),              True,  'iterator'),             # noqa: E241
    (iter([]),                  True,  'iterator-empty'),       # noqa: E241
    ((x for x in [1, 2]),       True,  'generator'),            # noqa: E241
    ((x for x in []),           True,  'generator-empty'),      # noqa: E241
    (Series([1]),               True,  'Series'),               # noqa: E241
    (Series([]),                True,  'Series-empty'),         # noqa: E241
    (Series(['a']).str,         True,  'StringMethods'),        # noqa: E241
    (Series([], dtype='O').str, True,  'StringMethods-empty'),  # noqa: E241
    (Index([1]),                True,  'Index'),                # noqa: E241
    (Index([]),                 True,  'Index-empty'),          # noqa: E241
    (DataFrame([[1]]),          True,  'DataFrame'),            # noqa: E241
    (DataFrame(),               True,  'DataFrame-empty'),      # noqa: E241
    (np.ndarray((2,) * 1),      True,  'ndarray-1d'),           # noqa: E241
    (np.array([]),              True,  'ndarray-1d-empty'),     # noqa: E241
    (np.ndarray((2,) * 2),      True,  'ndarray-2d'),           # noqa: E241
    (np.array([[]]),            True,  'ndarray-2d-empty'),     # noqa: E241
    (np.ndarray((2,) * 3),      True,  'ndarray-3d'),           # noqa: E241
    (np.array([[[]]]),          True,  'ndarray-3d-empty'),     # noqa: E241
    (np.ndarray((2,) * 4),      True,  'ndarray-4d'),           # noqa: E241
    (np.array([[[[]]]]),        True,  'ndarray-4d-empty'),     # noqa: E241
    (np.array(2),               False, 'ndarray-0d'),           # noqa: E241
    (1,                         False, 'int'),                  # noqa: E241
    (b'123',                    False, 'bytes'),                # noqa: E241
    (b'',                       False, 'bytes-empty'),          # noqa: E241
    ('123',                     False, 'string'),               # noqa: E241
    ('',                        False, 'string-empty'),         # noqa: E241
    (str,                       False, 'string-type'),          # noqa: E241
    (object(),                  False, 'object'),               # noqa: E241
    (np.nan,                    False, 'NaN'),                  # noqa: E241
    (None,                      False, 'None')                  # noqa: E241
]
objs, expected, ids = zip(*ll_params)


@pytest.fixture(params=zip(objs, expected), ids=ids)
def maybe_list_like(request):
    return request.param


def test_is_list_like(maybe_list_like):
    obj, expected = maybe_list_like
    expected = True if expected == 'set' else expected
    assert inference.is_list_like(obj) == expected


def test_is_list_like_disallow_sets(maybe_list_like):
    obj, expected = maybe_list_like
    expected = False if expected == 'set' else expected
    assert inference.is_list_like(obj, allow_sets=False) == expected


def test_is_sequence():
    is_seq = inference.is_sequence
    assert (is_seq((1, 2)))
    assert (is_seq([1, 2]))
    assert (not is_seq("abcd"))
    assert (not is_seq(u("abcd")))
    assert (not is_seq(np.int64))

    class A(object):

        def __getitem__(self):
            return 1

    assert (not is_seq(A()))


def test_is_array_like():
    assert inference.is_array_like(Series([]))
    assert inference.is_array_like(Series([1, 2]))
    assert inference.is_array_like(np.array(["a", "b"]))
    assert inference.is_array_like(Index(["2016-01-01"]))

    class DtypeList(list):
        dtype = "special"

    assert inference.is_array_like(DtypeList())

    assert not inference.is_array_like([1, 2, 3])
    assert not inference.is_array_like(tuple())
    assert not inference.is_array_like("foo")
    assert not inference.is_array_like(123)


@pytest.mark.parametrize('inner', [
    [], [1], (1, ), (1, 2), {'a': 1}, {1, 'a'}, Series([1]),
    Series([]), Series(['a']).str, (x for x in range(5))
])
@pytest.mark.parametrize('outer', [
    list, Series, np.array, tuple
])
def test_is_nested_list_like_passes(inner, outer):
    result = outer([inner for _ in range(5)])
    assert inference.is_list_like(result)


@pytest.mark.parametrize('obj', [
    'abc', [], [1], (1,), ['a'], 'a', {'a'},
    [1, 2, 3], Series([1]), DataFrame({"A": [1]}),
    ([1, 2] for _ in range(5)),
])
def test_is_nested_list_like_fails(obj):
    assert not inference.is_nested_list_like(obj)


@pytest.mark.parametrize(
    "ll", [{}, {'A': 1}, Series([1]), collections.defaultdict()])
def test_is_dict_like_passes(ll):
    assert inference.is_dict_like(ll)


@pytest.mark.parametrize("ll", [
    '1', 1, [1, 2], (1, 2), range(2), Index([1]),
    dict, collections.defaultdict, Series
])
def test_is_dict_like_fails(ll):
    assert not inference.is_dict_like(ll)


@pytest.mark.parametrize("has_keys", [True, False])
@pytest.mark.parametrize("has_getitem", [True, False])
@pytest.mark.parametrize("has_contains", [True, False])
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
    class DictLike(object):
        def __init__(self, d):
            self.d = d

        if has_keys:
            def keys(self):
                return self.d.keys()

        if has_getitem:
            def __getitem__(self, key):
                return self.d.__getitem__(key)

        if has_contains:
            def __contains__(self, key):
                return self.d.__contains__(key)

    d = DictLike({1: 2})
    result = inference.is_dict_like(d)
    expected = has_keys and has_getitem and has_contains

    assert result is expected


def test_is_file_like():
    class MockFile(object):
        pass

    is_file = inference.is_file_like

    data = StringIO("data")
    assert is_file(data)

    # No read / write attributes
    # No iterator attributes
    m = MockFile()
    assert not is_file(m)

    MockFile.write = lambda self: 0

    # Write attribute but not an iterator
    m = MockFile()
    assert not is_file(m)

    # gh-16530: Valid iterator just means we have the
    # __iter__ attribute for our purposes.
    MockFile.__iter__ = lambda self: self

    # Valid write-only file
    m = MockFile()
    assert is_file(m)

    del MockFile.write
    MockFile.read = lambda self: 0

    # Valid read-only file
    m = MockFile()
    assert is_file(m)

    # Iterator but no read / write attributes
    data = [1, 2, 3]
    assert not is_file(data)


@pytest.mark.parametrize(
    "ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)])
def test_is_names_tuple_passes(ll):
    assert inference.is_named_tuple(ll)


@pytest.mark.parametrize(
    "ll", [(1, 2, 3), 'a', Series({'pi': 3.14})])
def test_is_names_tuple_fails(ll):
    assert not inference.is_named_tuple(ll)


def test_is_hashable():

    # all new-style classes are hashable by default
    class HashableClass(object):
        pass

    class UnhashableClass1(object):
        __hash__ = None

    class UnhashableClass2(object):

        def __hash__(self):
            raise TypeError("Not hashable")

    hashable = (1,
                3.14,
                np.float64(3.14),
                'a',
                tuple(),
                (1, ),
                HashableClass(), )
    not_hashable = ([], UnhashableClass1(), )
    abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), )

    for i in hashable:
        assert inference.is_hashable(i)
    for i in not_hashable:
        assert not inference.is_hashable(i)
    for i in abc_hashable_not_really_hashable:
        assert not inference.is_hashable(i)

    # numpy.array is no longer collections.Hashable as of
    # https://github.com/numpy/numpy/pull/5326, just test
    # is_hashable()
    assert not inference.is_hashable(np.array([]))

    # old-style classes in Python 2 don't appear hashable to
    # collections.Hashable but also seem to support hash() by default
    if PY2:

        class OldStyleClass():
            pass

        c = OldStyleClass()
        assert not isinstance(c, compat.Hashable)
        assert inference.is_hashable(c)
        hash(c)  # this will not raise


@pytest.mark.parametrize(
    "ll", [re.compile('ad')])
def test_is_re_passes(ll):
    assert inference.is_re(ll)


@pytest.mark.parametrize(
    "ll", ['x', 2, 3, object()])
def test_is_re_fails(ll):
    assert not inference.is_re(ll)


@pytest.mark.parametrize(
    "ll", [r'a', u('x'),
           r'asdf',
           re.compile('adsf'),
           u(r'\u2233\s*'),
           re.compile(r'')])
def test_is_recompilable_passes(ll):
    assert inference.is_re_compilable(ll)


@pytest.mark.parametrize(
    "ll", [1, [], object()])
def test_is_recompilable_fails(ll):
    assert not inference.is_re_compilable(ll)


class TestInference(object):

    def test_infer_dtype_bytes(self):
        compare = 'string' if PY2 else 'bytes'

        # string array of bytes
        arr = np.array(list('abc'), dtype='S1')
        assert lib.infer_dtype(arr, skipna=True) == compare

        # object array of bytes
        arr = arr.astype(object)
        assert lib.infer_dtype(arr, skipna=True) == compare

        # object array of bytes with missing values
        assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare
Loading ...