import itertools
from warnings import catch_warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm
@pytest.fixture
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
codes=[[0, 1, 2, 3]], names=['first'])
@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.randn(6, 2), index=index)
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexLoc(object):
def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
x = Series(index=index, data=range(9), dtype=np.float64)
y = Series([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
dtype=np.float64)
result = x.loc[y]
tm.assert_series_equal(result, expected)
result = x.loc[[1, 3]]
tm.assert_series_equal(result, expected)
# GH15424
y1 = Series([1, 3], index=[1, 2])
result = x.loc[y1]
tm.assert_series_equal(result, expected)
empty = Series(data=[], dtype=np.float64)
expected = Series([], index=MultiIndex(
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)
def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
x = Series(index=index, data=range(9), dtype=np.float64)
y = np.array([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
dtype=np.float64)
result = x.loc[y]
tm.assert_series_equal(result, expected)
# empty array:
empty = np.array([])
expected = Series([], index=MultiIndex(
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)
# 0-dim array (scalar):
scalar = np.int64(1)
expected = Series(
data=[0, 1, 2],
index=['A', 'B', 'C'],
dtype=np.float64)
result = x.loc[scalar]
tm.assert_series_equal(result, expected)
def test_loc_multiindex(self):
mi_labels = DataFrame(np.random.randn(3, 3),
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
mi_int = DataFrame(np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]])
# the first row
rs = mi_labels.loc['i']
with catch_warnings(record=True):
xp = mi_labels.ix['i']
tm.assert_frame_equal(rs, xp)
# 2nd (last) columns
rs = mi_labels.loc[:, 'j']
with catch_warnings(record=True):
xp = mi_labels.ix[:, 'j']
tm.assert_frame_equal(rs, xp)
# corner column
rs = mi_labels.loc['j'].loc[:, 'j']
with catch_warnings(record=True):
xp = mi_labels.ix['j'].ix[:, 'j']
tm.assert_frame_equal(rs, xp)
# with a tuple
rs = mi_labels.loc[('i', 'X')]
with catch_warnings(record=True):
xp = mi_labels.ix[('i', 'X')]
tm.assert_frame_equal(rs, xp)
rs = mi_int.loc[4]
with catch_warnings(record=True):
xp = mi_int.ix[4]
tm.assert_frame_equal(rs, xp)
# missing label
pytest.raises(KeyError, lambda: mi_int.loc[2])
with catch_warnings(record=True):
# GH 21593
pytest.raises(KeyError, lambda: mi_int.ix[2])
def test_loc_multiindex_indexer_none(self):
# GH6788
# multi-index indexer is None (meaning take all)
attributes = ['Attribute' + str(i) for i in range(1)]
attribute_values = ['Value' + str(i) for i in range(5)]
index = MultiIndex.from_product([attributes, attribute_values])
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
df = DataFrame(df, columns=index)
result = df[attributes]
tm.assert_frame_equal(result, df)
# GH 7349
# loc with a multi-index seems to be doing fallback
df = DataFrame(np.arange(12).reshape(-1, 1),
index=MultiIndex.from_product([[1, 2, 3, 4],
[1, 2, 3]]))
expected = df.loc[([1, 2], ), :]
result = df.loc[[1, 2]]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_incomplete(self):
# GH 7399
# incomplete indexers
s = Series(np.arange(15, dtype='int64'),
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
expected = s.loc[:, 'a':'c']
result = s.loc[0:4, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[:4, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[0:, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
# GH 7400
# multiindexer gettitem with list of indexers skips wrong element
s = Series(np.arange(15, dtype='int64'),
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
result = s.loc[2:4:2, 'a':'c']
tm.assert_series_equal(result, expected)
def test_get_loc_single_level(self, single_level_multiindex):
single_level = single_level_multiindex
s = Series(np.random.randn(len(single_level)),
index=single_level)
for k in single_level.values:
s[k]
def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices
index = MultiIndex.from_tuples([t for t in itertools.product(
[6, 7, 8], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_tuples([t
for t in itertools.product(
[10, 20, 30], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)
# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ['a', 'b']
tm.assert_frame_equal(result, expected)
result = df.loc[:, 10]
# expected = df.ix[:,10] (this fails)
expected = df[10]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
'indexer_type_1',
(list, tuple, set, slice, np.ndarray, Series, Index))
@pytest.mark.parametrize(
'indexer_type_2',
(list, tuple, set, slice, np.ndarray, Series, Index))
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)
a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype='int64'),
index=index, columns=['Data'])
keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)
# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys))
result = df.loc[indexer, 'Data']
expected = Series(
[1, 2, 4, 5], name='Data',
index=MultiIndex.from_product(keys))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('indexer, is_level1, expected_error', [
([], False, None), # empty ok
(['A'], False, None),
(['A', 'D'], False, None),
(['D'], False, r"\['D'\] not in index"), # not any values found
(pd.IndexSlice[:, ['foo']], True, None),
(pd.IndexSlice[:, ['foo', 'bah']], True, None)
])
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
expected_error):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product([['A', 'B', 'C'],
['foo', 'bar', 'baz']],
names=['one', 'two'])
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
if indexer == []:
expected = s.iloc[[]]
elif is_level1:
expected = Series([0, 3, 6], index=MultiIndex.from_product(
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
else:
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
names=['one', 'two'])
expected = Series(np.arange(3, dtype='int64'),
index=exp_idx).sort_index()
if expected_error is not None:
with pytest.raises(KeyError, match=expected_error):
s.loc[indexer]
else:
result = s.loc[indexer]
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
@pytest.mark.parametrize('indexer', [
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
])
def test_series_loc_getitem_fancy(
multiindex_year_month_day_dataframe_random_data, indexer):
s = multiindex_year_month_day_dataframe_random_data['A']
expected = s.reindex(s.index[49:51])
result = indexer(s)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('columns_indexer', [
([], slice(None)),
(['foo'], [])
])
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
# GH 8737
# empty indexer
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
['alpha', 'beta']))
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
df = df.sort_index(level=0, axis=1)
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
result = df.loc[:, columns_indexer]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
# regression from < 0.14.0
# GH 7914
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
columns=MultiIndex.from_tuples([('functs', 'mean'),
('functs', 'median')]),
index=['function', 'name'])
result = df.loc['function', ('functs', 'mean')]
expected = np.mean
assert result == expected
def test_loc_getitem_tuple_plus_slice():
# GH 671
df = DataFrame({'a': np.arange(10),
'b': np.arange(10),
'c': np.random.randn(10),
'd': np.random.randn(10)}
Loading ...