Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexing / multiindex / test_setitem.py

from warnings import catch_warnings, simplefilter

import numpy as np
from numpy.random import randn
import pytest

import pandas as pd
from pandas import (
    DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
import pandas.core.common as com
from pandas.util import testing as tm


@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexSetItem(object):

    def test_setitem_multiindex(self):
        with catch_warnings(record=True):

            for index_fn in ('ix', 'loc'):

                def assert_equal(a, b):
                    assert a == b

                def check(target, indexers, value, compare_fn, expected=None):
                    fn = getattr(target, index_fn)
                    fn.__setitem__(indexers, value)
                    result = fn.__getitem__(indexers)
                    if expected is None:
                        expected = value
                    compare_fn(result, expected)
                # GH7190
                index = MultiIndex.from_product([np.arange(0, 100),
                                                 np.arange(0, 80)],
                                                names=['time', 'firm'])
                t, n = 0, 2
                df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
                                                'X', 'd', 'profit'],
                               index=index)
                check(target=df, indexers=((t, n), 'X'), value=0,
                      compare_fn=assert_equal)

                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
                                              'X', 'd', 'profit'],
                               index=index)
                check(target=df, indexers=((t, n), 'X'), value=1,
                      compare_fn=assert_equal)

                df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
                                        'X', 'd', 'profit'],
                               index=index)
                check(target=df, indexers=((t, n), 'X'), value=2,
                      compare_fn=assert_equal)

                # gh-7218: assigning with 0-dim arrays
                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
                                              'X', 'd', 'profit'],
                               index=index)
                check(target=df,
                      indexers=((t, n), 'X'),
                      value=np.array(3),
                      compare_fn=assert_equal,
                      expected=3, )

                # GH5206
                df = DataFrame(np.arange(25).reshape(5, 5),
                               columns='A,B,C,D,E'.split(','), dtype=float)
                df['F'] = 99
                row_selection = df['A'] % 2 == 0
                col_selection = ['B', 'C']
                with catch_warnings(record=True):
                    df.ix[row_selection, col_selection] = df['F']
                output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
                with catch_warnings(record=True):
                    tm.assert_frame_equal(df.ix[row_selection, col_selection],
                                          output)
                check(target=df,
                      indexers=(row_selection, col_selection),
                      value=df['F'],
                      compare_fn=tm.assert_frame_equal,
                      expected=output, )

                # GH11372
                idx = MultiIndex.from_product([
                    ['A', 'B', 'C'],
                    date_range('2015-01-01', '2015-04-01', freq='MS')])
                cols = MultiIndex.from_product([
                    ['foo', 'bar'],
                    date_range('2016-01-01', '2016-02-01', freq='MS')])

                df = DataFrame(np.random.random((12, 4)),
                               index=idx, columns=cols)

                subidx = MultiIndex.from_tuples(
                    [('A', Timestamp('2015-01-01')),
                     ('A', Timestamp('2015-02-01'))])
                subcols = MultiIndex.from_tuples(
                    [('foo', Timestamp('2016-01-01')),
                     ('foo', Timestamp('2016-02-01'))])

                vals = DataFrame(np.random.random((2, 2)),
                                 index=subidx, columns=subcols)
                check(target=df,
                      indexers=(subidx, subcols),
                      value=vals,
                      compare_fn=tm.assert_frame_equal, )
                # set all columns
                vals = DataFrame(
                    np.random.random((2, 4)), index=subidx, columns=cols)
                check(target=df,
                      indexers=(subidx, slice(None, None, None)),
                      value=vals,
                      compare_fn=tm.assert_frame_equal, )
                # identity
                copy = df.copy()
                check(target=df, indexers=(df.index, df.columns), value=df,
                      compare_fn=tm.assert_frame_equal, expected=copy)

    def test_multiindex_setitem(self):

        # GH 3738
        # setting with a multi-index right hand side
        arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
                  np.array(['one', 'two', 'one', 'one', 'two', 'one']),
                  np.arange(0, 6, 1)]

        df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
                            columns=['A', 'B', 'C']).sort_index()

        expected = df_orig.loc[['bar']] * 2
        df = df_orig.copy()
        df.loc[['bar']] *= 2
        tm.assert_frame_equal(df.loc[['bar']], expected)

        # raise because these have differing levels
        with pytest.raises(TypeError):
            df.loc['bar'] *= 2

        # from SO
        # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
        df_orig = DataFrame.from_dict({'price': {
            ('DE', 'Coal', 'Stock'): 2,
            ('DE', 'Gas', 'Stock'): 4,
            ('DE', 'Elec', 'Demand'): 1,
            ('FR', 'Gas', 'Stock'): 5,
            ('FR', 'Solar', 'SupIm'): 0,
            ('FR', 'Wind', 'SupIm'): 0
        }})
        df_orig.index = MultiIndex.from_tuples(df_orig.index,
                                               names=['Sit', 'Com', 'Type'])

        expected = df_orig.copy()
        expected.iloc[[0, 2, 3]] *= 2

        idx = pd.IndexSlice
        df = df_orig.copy()
        df.loc[idx[:, :, 'Stock'], :] *= 2
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[idx[:, :, 'Stock'], 'price'] *= 2
        tm.assert_frame_equal(df, expected)

    def test_multiindex_assignment(self):

        # GH3777 part 2

        # mixed dtype
        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
                       columns=list('abc'),
                       index=[[4, 4, 8], [8, 10, 12]])
        df['d'] = np.nan
        arr = np.array([0., 1.])

        with catch_warnings(record=True):
            df.ix[4, 'd'] = arr
            tm.assert_series_equal(df.ix[4, 'd'],
                                   Series(arr, index=[8, 10], name='d'))

        # single dtype
        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
                       columns=list('abc'),
                       index=[[4, 4, 8], [8, 10, 12]])

        with catch_warnings(record=True):
            df.ix[4, 'c'] = arr
            exp = Series(arr, index=[8, 10], name='c', dtype='float64')
            tm.assert_series_equal(df.ix[4, 'c'], exp)

        # scalar ok
        with catch_warnings(record=True):
            df.ix[4, 'c'] = 10
            exp = Series(10, index=[8, 10], name='c', dtype='float64')
            tm.assert_series_equal(df.ix[4, 'c'], exp)

        # invalid assignments
        with pytest.raises(ValueError):
            with catch_warnings(record=True):
                df.ix[4, 'c'] = [0, 1, 2, 3]

        with pytest.raises(ValueError):
            with catch_warnings(record=True):
                df.ix[4, 'c'] = [0]

        # groupby example
        NUM_ROWS = 100
        NUM_COLS = 10
        col_names = ['A' + num for num in
                     map(str, np.arange(NUM_COLS).tolist())]
        index_cols = col_names[:5]

        df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
                       dtype=np.int64, columns=col_names)
        df = df.set_index(index_cols).sort_index()
        grp = df.groupby(level=index_cols[:4])
        df['new_col'] = np.nan

        f_index = np.arange(5)

        def f(name, df2):
            return Series(np.arange(df2.shape[0]),
                          name=df2.index.values[0]).reindex(f_index)

        # TODO(wesm): unused?
        # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T

        # we are actually operating on a copy here
        # but in this case, that's ok
        for name, df2 in grp:
            new_vals = np.arange(df2.shape[0])
            with catch_warnings(record=True):
                df.ix[name, 'new_col'] = new_vals

    def test_series_setitem(
            self, multiindex_year_month_day_dataframe_random_data):
        ymd = multiindex_year_month_day_dataframe_random_data
        s = ymd['A']

        s[2000, 3] = np.nan
        assert isna(s.values[42:65]).all()
        assert notna(s.values[:42]).all()
        assert notna(s.values[65:]).all()

        s[2000, 3, 10] = np.nan
        assert isna(s[49])

    def test_frame_getitem_setitem_boolean(
            self, multiindex_dataframe_random_data):
        frame = multiindex_dataframe_random_data
        df = frame.T.copy()
        values = df.values

        result = df[df > 0]
        expected = df.where(df > 0)
        tm.assert_frame_equal(result, expected)

        df[df > 0] = 5
        values[values > 0] = 5
        tm.assert_almost_equal(df.values, values)

        df[df == 5] = 0
        values[values == 5] = 0
        tm.assert_almost_equal(df.values, values)

        # a df that needs alignment first
        df[df[:-1] < 0] = 2
        np.putmask(values[:-1], values[:-1] < 0, 2)
        tm.assert_almost_equal(df.values, values)

        with pytest.raises(TypeError, match='boolean values only'):
            df[df * 0] = 2

    def test_frame_getitem_setitem_multislice(self):
        levels = [['t1', 't2'], ['a', 'b', 'c']]
        codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
        midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
        df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)

        result = df.loc[:, 'value']
        tm.assert_series_equal(df['value'], result)

        with catch_warnings(record=True):
            simplefilter("ignore", DeprecationWarning)
            result = df.ix[:, 'value']
        tm.assert_series_equal(df['value'], result)

        result = df.loc[df.index[1:3], 'value']
        tm.assert_series_equal(df['value'][1:3], result)

        result = df.loc[:, :]
        tm.assert_frame_equal(df, result)

        result = df
        df.loc[:, 'value'] = 10
        result['value'] = 10
        tm.assert_frame_equal(df, result)

        df.loc[:, :] = 10
        tm.assert_frame_equal(df, result)

    def test_frame_setitem_multi_column(self):
        df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
                                              [0, 1, 0, 1]])

        cp = df.copy()
        cp['a'] = cp['b']
        tm.assert_frame_equal(cp['a'], cp['b'])

        # set with ndarray
        cp = df.copy()
        cp['a'] = cp['b'].values
        tm.assert_frame_equal(cp['a'], cp['b'])

        # ---------------------------------------
        # #1803
        columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
        df = DataFrame(index=[1, 3, 5], columns=columns)

        # Works, but adds a column instead of updating the two existing ones
        df['A'] = 0.0  # Doesn't work
        assert (df['A'].values == 0).all()

        # it broadcasts
        df['B', '1'] = [1, 2, 3]
        df['A'] = df['B', '1']

        sliced_a1 = df['A', '1']
        sliced_a2 = df['A', '2']
        sliced_b1 = df['B', '1']
        tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
        tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
        assert sliced_a1.name == ('A', '1')
        assert sliced_a2.name == ('A', '2')
        assert sliced_b1.name == ('B', '1')

    def test_getitem_setitem_tuple_plus_columns(
            self, multiindex_year_month_day_dataframe_random_data):
        # GH #1013
        ymd = multiindex_year_month_day_dataframe_random_data
        df = ymd[:5]

        result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
        expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
        tm.assert_series_equal(result, expected)
Loading ...