Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexes / multi / test_integrity.py

# -*- coding: utf-8 -*-

import re

import numpy as np
import pytest

from pandas.compat import lrange, range

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike

import pandas as pd
from pandas import IntervalIndex, MultiIndex, RangeIndex
import pandas.util.testing as tm


def test_labels_dtypes():

    # GH 8456
    i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
    assert i.codes[0].dtype == 'int8'
    assert i.codes[1].dtype == 'int8'

    i = MultiIndex.from_product([['a'], range(40)])
    assert i.codes[1].dtype == 'int8'
    i = MultiIndex.from_product([['a'], range(400)])
    assert i.codes[1].dtype == 'int16'
    i = MultiIndex.from_product([['a'], range(40000)])
    assert i.codes[1].dtype == 'int32'

    i = pd.MultiIndex.from_product([['a'], range(1000)])
    assert (i.codes[0] >= 0).all()
    assert (i.codes[1] >= 0).all()


def test_values_boxed():
    tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
              (3, pd.Timestamp('2000-01-03')),
              (1, pd.Timestamp('2000-01-04')),
              (2, pd.Timestamp('2000-01-02')),
              (3, pd.Timestamp('2000-01-03'))]
    result = pd.MultiIndex.from_tuples(tuples)
    expected = construct_1d_object_array_from_listlike(tuples)
    tm.assert_numpy_array_equal(result.values, expected)
    # Check that code branches for boxed values produce identical results
    tm.assert_numpy_array_equal(result.values[:4], result[:4].values)


def test_values_multiindex_datetimeindex():
    # Test to ensure we hit the boxing / nobox part of MI.values
    ints = np.arange(10 ** 18, 10 ** 18 + 5)
    naive = pd.DatetimeIndex(ints)
    # TODO(GH-24559): Remove the FutureWarning
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        aware = pd.DatetimeIndex(ints, tz='US/Central')

    idx = pd.MultiIndex.from_arrays([naive, aware])
    result = idx.values

    outer = pd.DatetimeIndex([x[0] for x in result])
    tm.assert_index_equal(outer, naive)

    inner = pd.DatetimeIndex([x[1] for x in result])
    tm.assert_index_equal(inner, aware)

    # n_lev > n_lab
    result = idx[:2].values

    outer = pd.DatetimeIndex([x[0] for x in result])
    tm.assert_index_equal(outer, naive[:2])

    inner = pd.DatetimeIndex([x[1] for x in result])
    tm.assert_index_equal(inner, aware[:2])


def test_values_multiindex_periodindex():
    # Test to ensure we hit the boxing / nobox part of MI.values
    ints = np.arange(2007, 2012)
    pidx = pd.PeriodIndex(ints, freq='D')

    idx = pd.MultiIndex.from_arrays([ints, pidx])
    result = idx.values

    outer = pd.Int64Index([x[0] for x in result])
    tm.assert_index_equal(outer, pd.Int64Index(ints))

    inner = pd.PeriodIndex([x[1] for x in result])
    tm.assert_index_equal(inner, pidx)

    # n_lev > n_lab
    result = idx[:2].values

    outer = pd.Int64Index([x[0] for x in result])
    tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))

    inner = pd.PeriodIndex([x[1] for x in result])
    tm.assert_index_equal(inner, pidx[:2])


def test_consistency():
    # need to construct an overflow
    major_axis = lrange(70000)
    minor_axis = lrange(10)

    major_codes = np.arange(70000)
    minor_codes = np.repeat(lrange(10), 7000)

    # the fact that is works means it's consistent
    index = MultiIndex(levels=[major_axis, minor_axis],
                       codes=[major_codes, minor_codes])

    # inconsistent
    major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
    minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
    index = MultiIndex(levels=[major_axis, minor_axis],
                       codes=[major_codes, minor_codes])

    assert index.is_unique is False


def test_hash_collisions():
    # non-smoke test that we don't get hash collisions

    index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
                                    names=['one', 'two'])
    result = index.get_indexer(index.values)
    tm.assert_numpy_array_equal(result, np.arange(
        len(index), dtype='intp'))

    for i in [0, 1, len(index) - 2, len(index) - 1]:
        result = index.get_loc(index[i])
        assert result == i


def test_dims():
    pass


def take_invalid_kwargs():
    vals = [['A', 'B'],
            [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
    idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
    indices = [1, 2]

    msg = r"take\(\) got an unexpected keyword argument 'foo'"
    with pytest.raises(TypeError, match=msg):
        idx.take(indices, foo=2)

    msg = "the 'out' parameter is not supported"
    with pytest.raises(ValueError, match=msg):
        idx.take(indices, out=indices)

    msg = "the 'mode' parameter is not supported"
    with pytest.raises(ValueError, match=msg):
        idx.take(indices, mode='clip')


def test_isna_behavior(idx):
    # should not segfault GH5123
    # NOTE: if MI representation changes, may make sense to allow
    # isna(MI)
    with pytest.raises(NotImplementedError):
        pd.isna(idx)


def test_large_multiindex_error():
    # GH12527
    df_below_1000000 = pd.DataFrame(
        1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
        columns=['dest'])
    with pytest.raises(KeyError):
        df_below_1000000.loc[(-1, 0), 'dest']
    with pytest.raises(KeyError):
        df_below_1000000.loc[(3, 0), 'dest']
    df_above_1000000 = pd.DataFrame(
        1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
        columns=['dest'])
    with pytest.raises(KeyError):
        df_above_1000000.loc[(-1, 0), 'dest']
    with pytest.raises(KeyError):
        df_above_1000000.loc[(3, 0), 'dest']


def test_million_record_attribute_error():
    # GH 18165
    r = list(range(1000000))
    df = pd.DataFrame({'a': r, 'b': r},
                      index=pd.MultiIndex.from_tuples([(x, x) for x in r]))

    msg = "'Series' object has no attribute 'foo'"
    with pytest.raises(AttributeError, match=msg):
        df['a'].foo()


def test_can_hold_identifiers(idx):
    key = idx[0]
    assert idx._can_hold_identifiers_and_holds_name(key) is True


def test_metadata_immutable(idx):
    levels, codes = idx.levels, idx.codes
    # shouldn't be able to set at either the top level or base level
    mutable_regex = re.compile('does not support mutable operations')
    with pytest.raises(TypeError, match=mutable_regex):
        levels[0] = levels[0]
    with pytest.raises(TypeError, match=mutable_regex):
        levels[0][0] = levels[0][0]
    # ditto for labels
    with pytest.raises(TypeError, match=mutable_regex):
        codes[0] = codes[0]
    with pytest.raises(TypeError, match=mutable_regex):
        codes[0][0] = codes[0][0]
    # and for names
    names = idx.names
    with pytest.raises(TypeError, match=mutable_regex):
        names[0] = names[0]


def test_level_setting_resets_attributes():
    ind = pd.MultiIndex.from_arrays([
        ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
    ])
    assert ind.is_monotonic
    ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
    # if this fails, probably didn't reset the cache correctly.
    assert not ind.is_monotonic


def test_rangeindex_fallback_coercion_bug():
    # GH 12893
    foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
    bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
    df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
    df.index.names = ['fizz', 'buzz']

    str(df)
    expected = pd.DataFrame({'bar': np.arange(100),
                             'foo': np.arange(100)},
                            index=pd.MultiIndex.from_product(
                                [range(10), range(10)],
                                names=['fizz', 'buzz']))
    tm.assert_frame_equal(df, expected, check_like=True)

    result = df.index.get_level_values('fizz')
    expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
    tm.assert_index_equal(result, expected)

    result = df.index.get_level_values('buzz')
    expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
    tm.assert_index_equal(result, expected)


def test_hash_error(indices):
    index = indices
    with pytest.raises(TypeError, match=("unhashable type: %r" %
                                         type(index).__name__)):
        hash(indices)


def test_mutability(indices):
    if not len(indices):
        return
    pytest.raises(TypeError, indices.__setitem__, 0, indices[0])


def test_wrong_number_names(indices):
    with pytest.raises(ValueError, match="^Length"):
        indices.names = ["apple", "banana", "carrot"]


def test_memory_usage(idx):
    result = idx.memory_usage()
    if len(idx):
        idx.get_loc(idx[0])
        result2 = idx.memory_usage()
        result3 = idx.memory_usage(deep=True)

        # RangeIndex, IntervalIndex
        # don't have engines
        if not isinstance(idx, (RangeIndex, IntervalIndex)):
            assert result2 > result

        if idx.inferred_type == 'object':
            assert result3 > result2

    else:

        # we report 0 for no-length
        assert result == 0


def test_nlevels(idx):
    assert idx.nlevels == 2