from warnings import catch_warnings, simplefilter
import numpy as np
from numpy.random import randn
import pytest
import pandas as pd
from pandas import (
DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
import pandas.core.common as com
from pandas.util import testing as tm
class TestMultiIndexSetItem(object):
def test_setitem_multiindex(self):
with catch_warnings(record=True):
for index_fn in ('ix', 'loc'):
def assert_equal(a, b):
assert a == b
def check(target, indexers, value, compare_fn, expected=None):
fn = getattr(target, index_fn)
fn.__setitem__(indexers, value)
result = fn.__getitem__(indexers)
if expected is None:
expected = value
compare_fn(result, expected)
# GH7190
index = MultiIndex.from_product([np.arange(0, 100),
np.arange(0, 80)],
names=['time', 'firm'])
t, n = 0, 2
df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
check(target=df, indexers=((t, n), 'X'), value=0,
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
check(target=df, indexers=((t, n), 'X'), value=1,
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
check(target=df, indexers=((t, n), 'X'), value=2,
# gh-7218: assigning with 0-dim arrays
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
indexers=((t, n), 'X'),
expected=3, )
# GH5206
df = DataFrame(np.arange(25).reshape(5, 5),
columns='A,B,C,D,E'.split(','), dtype=float)
df['F'] = 99
row_selection = df['A'] % 2 == 0
col_selection = ['B', 'C']
with catch_warnings(record=True):
df.ix[row_selection, col_selection] = df['F']
output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
with catch_warnings(record=True):
tm.assert_frame_equal(df.ix[row_selection, col_selection],
indexers=(row_selection, col_selection),
expected=output, )
# GH11372
idx = MultiIndex.from_product([
['A', 'B', 'C'],
date_range('2015-01-01', '2015-04-01', freq='MS')])
cols = MultiIndex.from_product([
['foo', 'bar'],
date_range('2016-01-01', '2016-02-01', freq='MS')])
df = DataFrame(np.random.random((12, 4)),
index=idx, columns=cols)
subidx = MultiIndex.from_tuples(
[('A', Timestamp('2015-01-01')),
('A', Timestamp('2015-02-01'))])
subcols = MultiIndex.from_tuples(
[('foo', Timestamp('2016-01-01')),
('foo', Timestamp('2016-02-01'))])
vals = DataFrame(np.random.random((2, 2)),
index=subidx, columns=subcols)
indexers=(subidx, subcols),
compare_fn=tm.assert_frame_equal, )
# set all columns
vals = DataFrame(
np.random.random((2, 4)), index=subidx, columns=cols)
indexers=(subidx, slice(None, None, None)),
compare_fn=tm.assert_frame_equal, )
# identity
copy = df.copy()
check(target=df, indexers=(df.index, df.columns), value=df,
compare_fn=tm.assert_frame_equal, expected=copy)
def test_multiindex_setitem(self):
# GH 3738
# setting with a multi-index right hand side
arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
np.array(['one', 'two', 'one', 'one', 'two', 'one']),
np.arange(0, 6, 1)]
df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
columns=['A', 'B', 'C']).sort_index()
expected = df_orig.loc[['bar']] * 2
df = df_orig.copy()
df.loc[['bar']] *= 2
tm.assert_frame_equal(df.loc[['bar']], expected)
# raise because these have differing levels
with pytest.raises(TypeError):
df.loc['bar'] *= 2
# from SO
df_orig = DataFrame.from_dict({'price': {
('DE', 'Coal', 'Stock'): 2,
('DE', 'Gas', 'Stock'): 4,
('DE', 'Elec', 'Demand'): 1,
('FR', 'Gas', 'Stock'): 5,
('FR', 'Solar', 'SupIm'): 0,
('FR', 'Wind', 'SupIm'): 0
df_orig.index = MultiIndex.from_tuples(df_orig.index,
names=['Sit', 'Com', 'Type'])
expected = df_orig.copy()
expected.iloc[[0, 2, 3]] *= 2
idx = pd.IndexSlice
df = df_orig.copy()
df.loc[idx[:, :, 'Stock'], :] *= 2
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, :, 'Stock'], 'price'] *= 2
tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
# GH3777 part 2
# mixed dtype
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
index=[[4, 4, 8], [8, 10, 12]])
df['d'] = np.nan
arr = np.array([0., 1.])
with catch_warnings(record=True):
df.ix[4, 'd'] = arr
tm.assert_series_equal(df.ix[4, 'd'],
Series(arr, index=[8, 10], name='d'))
# single dtype
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
index=[[4, 4, 8], [8, 10, 12]])
with catch_warnings(record=True):
df.ix[4, 'c'] = arr
exp = Series(arr, index=[8, 10], name='c', dtype='float64')
tm.assert_series_equal(df.ix[4, 'c'], exp)
# scalar ok
with catch_warnings(record=True):
df.ix[4, 'c'] = 10
exp = Series(10, index=[8, 10], name='c', dtype='float64')
tm.assert_series_equal(df.ix[4, 'c'], exp)
# invalid assignments
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, 'c'] = [0, 1, 2, 3]
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, 'c'] = [0]
# groupby example
NUM_ROWS = 100
col_names = ['A' + num for num in
map(str, np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
dtype=np.int64, columns=col_names)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df['new_col'] = np.nan
f_index = np.arange(5)
def f(name, df2):
return Series(np.arange(df2.shape[0]),
# TODO(wesm): unused?
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
# we are actually operating on a copy here
# but in this case, that's ok
for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
with catch_warnings(record=True):
df.ix[name, 'new_col'] = new_vals
def test_series_setitem(
self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd['A']
s[2000, 3] = np.nan
assert isna(s.values[42:65]).all()
assert notna(s.values[:42]).all()
assert notna(s.values[65:]).all()
s[2000, 3, 10] = np.nan
assert isna(s[49])
def test_frame_getitem_setitem_boolean(
self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()
values = df.values
result = df[df > 0]
expected = df.where(df > 0)
tm.assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)
with pytest.raises(TypeError, match='boolean values only'):
df[df * 0] = 2
def test_frame_getitem_setitem_multislice(self):
levels = [['t1', 't2'], ['a', 'b', 'c']]
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
result = df.loc[:, 'value']
tm.assert_series_equal(df['value'], result)
with catch_warnings(record=True):
simplefilter("ignore", DeprecationWarning)
result = df.ix[:, 'value']
tm.assert_series_equal(df['value'], result)
result = df.loc[df.index[1:3], 'value']
tm.assert_series_equal(df['value'][1:3], result)
result = df.loc[:, :]
tm.assert_frame_equal(df, result)
result = df
df.loc[:, 'value'] = 10
result['value'] = 10
tm.assert_frame_equal(df, result)
df.loc[:, :] = 10
tm.assert_frame_equal(df, result)
def test_frame_setitem_multi_column(self):
df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
[0, 1, 0, 1]])
cp = df.copy()
cp['a'] = cp['b']
tm.assert_frame_equal(cp['a'], cp['b'])
# set with ndarray
cp = df.copy()
cp['a'] = cp['b'].values
tm.assert_frame_equal(cp['a'], cp['b'])
# ---------------------------------------
# #1803
columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
df = DataFrame(index=[1, 3, 5], columns=columns)
# Works, but adds a column instead of updating the two existing ones
df['A'] = 0.0 # Doesn't work
assert (df['A'].values == 0).all()
# it broadcasts
df['B', '1'] = [1, 2, 3]
df['A'] = df['B', '1']
sliced_a1 = df['A', '1']
sliced_a2 = df['A', '2']
sliced_b1 = df['B', '1']
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
assert == ('A', '1')
assert == ('A', '2')
assert == ('B', '1')
def test_getitem_setitem_tuple_plus_columns(
self, multiindex_year_month_day_dataframe_random_data):
# GH #1013
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd[:5]
result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
tm.assert_series_equal(result, expected)
Loading ...