Gemfury

steminc / pandas python

Repository URL to install this package:
Details
pandas / tests / indexing / test_indexing.py
# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101

""" test fancy indexing & misc """

import pytest

from warnings import catch_warnings
from datetime import datetime

from pandas.core.dtypes.common import (
    is_integer_dtype,
    is_float_dtype)
from pandas.compat import range, lrange, lzip, StringIO
import numpy as np

import pandas as pd
from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice
from pandas import NaT, DataFrame, Index, Series, MultiIndex
import pandas.util.testing as tm

from pandas.tests.indexing.common import Base, _mklbl


# ------------------------------------------------------------------------
# Indexing test cases


class TestFancy(Base):
    """ pure get/set item & fancy indexing """

    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        # invalid
        def f():
            df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                     2.2, 1.0])

        pytest.raises(ValueError, f)

        # valid
        df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                 2.2, 1.0])

        result = df.loc[df.index[2:6], 'bar']
        expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6],
                          name='bar')
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        def f():
            df[2:5] = np.arange(1, 4) * 1j

        pytest.raises(ValueError, f)

    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = pd.DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = pd.DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)

    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df['c'] = np.nan
        assert df['c'].dtype == np.float64

        df.loc[0, 'c'] = 'foo'
        expected = DataFrame([{"a": 1, "c": 'foo'},
                              {"a": 3, "b": 2, "c": np.nan}])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
                       index=list('ab'),
                       columns=['foo', 'bar', 'baz'])

        for val in [3.14, 'wxyz']:
            left = df.copy()
            left.loc['a', 'bar'] = val
            right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'),
                              columns=['foo', 'bar', 'baz'])

            tm.assert_frame_equal(left, right)
            assert is_integer_dtype(left['foo'])
            assert is_integer_dtype(left['baz'])

        left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0,
                         index=list('ab'),
                         columns=['foo', 'bar', 'baz'])
        left.loc['a', 'bar'] = 'wxyz'

        right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'),
                          columns=['foo', 'bar', 'baz'])

        tm.assert_frame_equal(left, right)
        assert is_float_dtype(left['foo'])
        assert is_float_dtype(left['baz'])

    def test_dups_fancy_indexing(self):

        # GH 3455
        from pandas.util.testing import makeCustomDataframe as mkdf
        df = mkdf(10, 3)
        df.columns = ['a', 'a', 'b']
        result = df[['b', 'a']].columns
        expected = Index(['b', 'a', 'a'])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']],
                       columns=list('aaaaaaa'))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']])
        result.columns = list('aaaaaaa')

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {'test': [5, 7, 9, 11],
             'test1': [4., 5, 6, 7],
             'other': list('abcd')}, index=['A', 'A', 'B', 'C'])
        rows = ['C', 'B']
        expected = DataFrame(
            {'test': [11, 9],
             'test1': [7., 6],
             'other': ['d', 'c']}, index=rows)
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ['C', 'B', 'E']
        expected = DataFrame(
            {'test': [11, 9, np.nan],
             'test1': [7., 6, np.nan],
             'other': ['d', 'c', np.nan]}, index=rows)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # see GH5553, make sure we use the right indexer
        rows = ['F', 'G', 'H', 'C', 'B', 'E']
        expected = DataFrame({'test': [np.nan, np.nan, np.nan, 11, 9, np.nan],
                              'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan],
                              'other': [np.nan, np.nan, np.nan,
                                        'd', 'c', np.nan]},
                             index=rows)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # inconsistent returns for unique/duplicate indices when values are
        # missing
        df = DataFrame(np.random.randn(4, 3), index=list('ABCD'))
        expected = df.reindex(['E'])

        dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD'))
        with catch_warnings(record=True):
            result = dfnu.ix[['E']]
        tm.assert_frame_equal(result, expected)

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        df = DataFrame({"A": list('abc')})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        # non unique with non unique selector
        df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C'])
        expected = DataFrame(
            {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E'])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[['A', 'A', 'E']]
        tm.assert_frame_equal(result, expected)

        # GH 5835
        # dups on index and missing values
        df = DataFrame(
            np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A'])

        expected = pd.concat(
            [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'],
                                              index=df.index)], axis=1)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[:, ['A', 'B', 'C']]
        tm.assert_frame_equal(result, expected)

        # GH 6504, multi-axis indexing
        df = DataFrame(np.random.randn(9, 2),
                       index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=['a', 'b'])

        expected = df.iloc[0:6]
        result = df.loc[[1, 2]]
        tm.assert_frame_equal(result, expected)

        expected = df
        result = df.loc[:, ['a', 'b']]
        tm.assert_frame_equal(result, expected)

        expected = df.iloc[0:6, :]
        result = df.loc[[1, 2], ['a', 'b']]
        tm.assert_frame_equal(result, expected)

    def test_indexing_mixed_frame_bug(self):

        # GH3492
        df = DataFrame({'a': {1: 'aaa', 2: 'bbb', 3: 'ccc'},
                        'b': {1: 111, 2: 222, 3: 333}})

        # this works, new column is created correctly
        df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x)

        # this does not work, ie column test is not changed
        idx = df['test'] == '_'
        temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x)
        df.loc[idx, 'test'] = temp
        assert df.iloc[0, 2] == '-----'

        # if I look at df, then element [0,2] equals '_'. If instead I type
        # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I
        # get '_'.

    def test_multitype_list_index_access(self):
        # GH 10610
        df = pd.DataFrame(np.random.random((10, 5)),
                          columns=["a"] + [20, 21, 22, 23])

        with pytest.raises(KeyError):
            df[[22, 26, -8]]
        assert df[21].shape[0] == df.shape[0]

    def test_set_index_nan(self):

        # GH 3586
        df = DataFrame({'PRuid': {17: 'nonQC',
                                  18: 'nonQC',
                                  19: 'nonQC',
                                  20: '10',
                                  21: '11',
                                  22: '12',
                                  23: '13',
                                  24: '24',
                                  25: '35',
                                  26: '46',
                                  27: '47',
                                  28: '48',
                                  29: '59',
                                  30: '10'},
                        'QC': {17: 0.0,
                               18: 0.0,
                               19: 0.0,
                               20: np.nan,
                               21: np.nan,
                               22: np.nan,
                               23: np.nan,
                               24: 1.0,
                               25: np.nan,
                               26: np.nan,
                               27: np.nan,
                               28: np.nan,
                               29: np.nan,
                               30: np.nan},
                        'data': {17: 7.9544899999999998,
                                 18: 8.0142609999999994,
                                 19: 7.8591520000000008,
                                 20: 0.86140349999999999,
                                 21: 0.87853110000000001,
                                 22: 0.8427041999999999,
                                 23: 0.78587700000000005,
                                 24: 0.73062459999999996,
                                 25: 0.81668560000000001,
                                 26: 0.81927080000000008,
                                 27: 0.80705009999999999,
                                 28: 0.81440240000000008,
                                 29: 0.80140849999999997,
                                 30: 0.81307740000000006},
                        'year': {17: 2006,
                                 18: 2007,
                                 19: 2008,
                                 20: 1985,
                                 21: 1985,
                                 22: 1985,
                                 23: 1985,
                                 24: 1985,
                                 25: 1985,
                                 26: 1985,
                                 27: 1985,
                                 28: 1985,
                                 29: 1985,
                                 30: 1986}}).reset_index()

        result = df.set_index(['year', 'PRuid', 'QC']).reset_index().reindex(
            columns=df.columns)
        tm.assert_frame_equal(result, df)

    def test_multi_nan_indexing(self):

        # GH 3588
        df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
                        'b': ["C1", "C2", "C3", "C4"],
                        "c": [10, 15, np.nan, 20]})
        result = df.set_index(['a', 'b'], drop=False)
        expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
                              'b': ["C1", "C2", "C3", "C4"],
                              "c": [10, 15, np.nan, 20]},
                             index=[Index(['R1', 'R2', np.nan, 'R4'],
                                          name='a'),
                                    Index(['C1', 'C2', 'C3', 'C4'], name='b')])
        tm.assert_frame_equal(result, expected)

    def test_multi_assign(self):

        # GH 3626, an assignement of a sub-df to a df
        df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'],
                        'PF': [0, 0, 0, 0, 1, 1],
                        'col1': lrange(6),
                        'col2': lrange(6, 12)})
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ['col1', 'col2']

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
                              'PF': [0, 0, 0, 0, 1, 1],
                              'col1': Series([0, 1, 4, 6, 8, 10]),
                              'col2': [12, 7, 16, np.nan, 20, 22]})

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
                              'PF': [0, 0, 0, 0, 1, 1],
                              'col1': [0., 1., 4., 6., 8., 10.],
                              'col2': [12, 7, 16, np.nan, 20, 22]})
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[
                       0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7]))

        expected = df.copy()
        mask = expected['A'] == 0
        for col in ['A', 'B']:
            expected.loc[mask, col] = df['D']

        df.loc[df['A'] == 0, ['A', 'B']] = df['D']
        tm.assert_frame_equal(df, expected)

    def test_setitem_list(self):

        # GH 6043
        # ix with a list
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            df.ix[1, 0] = [1, 2, 3]
            df.ix[1, 0] = [1, 2]

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            result.ix[1, 0] = [1, 2]

        tm.assert_frame_equal(result, df)

        # ix with an object
        class TO(object):

            def __init__(self, value):
                self.value = value

            def __str__(self):
                return "[{0}]".format(self.value)

            __repr__ = __str__

            def __eq__(self, other):
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            result.ix[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_string_slice(self):
        # GH 14424
        # string indexing against datetimelike with object
        # dtype should properly raises KeyError
        df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')],
                                        dtype=object))
        assert df.index.is_all_dates
        with pytest.raises(KeyError):
            df['2011']

        with pytest.raises(KeyError):
            df.loc['2011', 0]

        df = pd.DataFrame()
        assert not df.index.is_all_dates
        with pytest.raises(KeyError):
            df['2011']

        with pytest.raises(KeyError):
            df.loc['2011', 0]

    def test_mi_access(self):

        # GH 4145
        data = """h1 main  h3 sub  h5
0  a    A   1  A1   1
1  b    B   2  B1   2
2  c    B   3  A1   3
3  d    A   4  B2   4
4  e    A   5  B2   5
5  f    B   6  A2   6
"""

        df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
        df2 = df.set_index(['main', 'sub']).T.sort_index(1)
        index = Index(['h1', 'h3', 'h5'])
        columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
        expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T

        result = df2.loc[:, ('A', 'A1')]
        tm.assert_frame_equal(result, expected)

        result = df2[('A', 'A1')]
        tm.assert_frame_equal(result, expected)

        # GH 4146, not returning a block manager when selecting a unique index
        # from a duplicate index
        # as of 4879, this returns a Series (which is similar to what happens
        # with a non-unique)
        expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
        result = df2['A']['A1']
        tm.assert_series_equal(result, expected)

        # selecting a non_unique from the 2nd level
        expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
                             index=Index(['B2', 'B2'], name='sub'),
                             columns=['h1', 'h3', 'h5'], ).T
        result = df2['A']['B2']
        tm.assert_frame_equal(result, expected)

    def test_astype_assignment(self):

        # GH4312 (iloc)
        df_orig = DataFrame([['1', '2', '3', '.4', 5, 6., 'foo']],
                            columns=list('ABCDEFG'))

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
        expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
        expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        # GH5702 (loc)
        df = df_orig.copy()
        df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64)
        expected = DataFrame([[1, '2', '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64)
        expected = DataFrame([['1', 2, 3, '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        # full replacements / no nans
        df = DataFrame({'A': [1., 2., 3., 4.]})
        df.iloc[:, 0] = df['A'].astype(np.int64)
        expected = DataFrame({'A': [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({'A': [1., 2., 3., 4.]})
        df.loc[:, 'A'] = df['A'].astype(np.int64)
        expected = DataFrame({'A': [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    def test_astype_assignment_with_dups(self):

        # GH 4686
        # assignment with dups that has a dtype change
        cols = pd.MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')])
        df = DataFrame(np.arange(3).reshape((1, 3)),
                       columns=cols, dtype=object)
        index = df.index.copy()

        df['A'] = df['A'].astype(np.float64)
        tm.assert_index_equal(df.index, index)

        # TODO(wesm): unused variables
        # result = df.get_dtype_counts().sort_index()
        # expected = Series({'float64': 2, 'object': 1}).sort_index()

    @pytest.mark.parametrize("index,val", [
        (pd.Index([0, 1, 2]), 2),
        (pd.Index([0, 1, '2']), '2'),
        (pd.Index([0, 1, 2, np.inf, 4]), 4),
        (pd.Index([0, 1, 2, np.nan, 4]), 4),
        (pd.Index([0, 1, 2, np.inf]), np.inf),
        (pd.Index([0, 1, 2, np.nan]), np.nan),
    ])
    def test_index_contains(self, index, val):
        assert val in index

    @pytest.mark.parametrize("index,val", [
        (pd.Index([0, 1, 2]), '2'),
        (pd.Index([0, 1, '2']), 2),
        (pd.Index([0, 1, 2, np.inf]), 4),
        (pd.Index([0, 1, 2, np.nan]), 4),
        (pd.Index([0, 1, 2, np.inf]), np.nan),
        (pd.Index([0, 1, 2, np.nan]), np.inf),
        # Checking if np.inf in Int64Index should not cause an OverflowError
        # Related to GH 16957
        (pd.Int64Index([0, 1, 2]), np.inf),
        (pd.Int64Index([0, 1, 2]), np.nan),
        (pd.UInt64Index([0, 1, 2]), np.inf),
        (pd.UInt64Index([0, 1, 2]), np.nan),
    ])
    def test_index_not_contains(self, index, val):
        assert val not in index

    def test_index_type_coercion(self):

        with catch_warnings(record=True):

            # GH 11836
            # if we have an index type and set it with something that looks
            # to numpy like the same, but is actually, not
            # (e.g. setting with a float or string '0')
            # then we need to coerce to object

            # integer indexes
            for s in [Series(range(5)),
                      Series(range(5), index=range(1, 6))]:

                assert s.index.is_integer()

                for indexer in [lambda x: x.ix,
                                lambda x: x.loc,
                                lambda x: x]:
                    s2 = s.copy()
                    indexer(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert indexer(s2)[0.1] == 0

                    s2 = s.copy()
                    indexer(s2)[0.0] = 0
                    exp = s.index
                    if 0 not in s:
                        exp = Index(s.index.tolist() + [0])
                    tm.assert_index_equal(s2.index, exp)

                    s2 = s.copy()
                    indexer(s2)['0'] = 0
                    assert s2.index.is_object()

            for s in [Series(range(5), index=np.arange(5.))]:

                assert s.index.is_floating()

                for idxr in [lambda x: x.ix,
                             lambda x: x.loc,
                             lambda x: x]:

                    s2 = s.copy()
                    idxr(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert idxr(s2)[0.1] == 0

                    s2 = s.copy()
                    idxr(s2)[0.0] = 0
                    tm.assert_index_equal(s2.index, s.index)

                    s2 = s.copy()
                    idxr(s2)['0'] = 0
                    assert s2.index.is_object()


class TestMisc(Base):

    def test_indexer_caching(self):
        # GH5727
        # make sure that indexers are in the _internal_names_set
        n = 1000001
        arrays = [lrange(n), lrange(n)]
        index = MultiIndex.from_tuples(lzip(*arrays))
        s = Series(np.zeros(n), index=index)
        str(s)

        # setitem
        expected = Series(np.ones(n), index=index)
        s = Series(np.zeros(n), index=index)
        s[s == 0] = 1
        tm.assert_series_equal(s, expected)

    def test_float_index_to_mixed(self):
        df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
        df['a'] = 10
        tm.assert_frame_equal(DataFrame({0.0: df[0.0],
                                         1.0: df[1.0],
                                         'a': [10] * 10}),
                              df)

    def test_float_index_non_scalar_assignment(self):
        df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.])
        df.loc[df.index[:2]] = 1
        expected = DataFrame({'a': [1, 1, 3], 'b': [1, 1, 5]}, index=df.index)
        tm.assert_frame_equal(expected, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.])
        df2 = df.copy()
        df.loc[df.index] = df.loc[df.index]
        tm.assert_frame_equal(df, df2)

    def test_float_index_at_iat(self):
        s = pd.Series([1, 2, 3], index=[0.1, 0.2, 0.3])
        for el, item in s.iteritems():
            assert s.at[el] == item
        for i in range(len(s)):
            assert s.iat[i] == i + 1

    def test_rhs_alignment(self):
        # GH8258, tests that both rows & columns are aligned to what is
        # assigned to. covers both uniform data-type & multi-type cases
        def run_tests(df, rhs, right):
            # label, index, slice
            r, i, s = list('bcd'), [1, 2, 3], slice(1, 4)
            c, j, l = ['joe', 'jolie'], [1, 2], slice(1, 3)

            left = df.copy()
            left.loc[r, c] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            left.iloc[i, j] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            with catch_warnings(record=True):
                left.ix[s, l] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            with catch_warnings(record=True):
                left.ix[i, j] = rhs
            tm.assert_frame_equal(left, right)

            left = df.copy()
            with catch_warnings(record=True):
                left.ix[r, c] = rhs
            tm.assert_frame_equal(left, right)

        xs = np.arange(20).reshape(5, 4)
        cols = ['jim', 'joe', 'jolie', 'joline']
        df = pd.DataFrame(xs, columns=cols, index=list('abcde'))

        # right hand side; permute the indices and multiplpy by -2
        rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

        # expected `right` result; just multiply by -2
        right = df.copy()
        right.iloc[1:4, 1:3] *= -2

        # run tests with uniform dtypes
        run_tests(df, rhs, right)

        # make frames multi-type & re-run tests
        for frame in [df, rhs, right]:
            frame['joe'] = frame['joe'].astype('float64')
            frame['jolie'] = frame['jolie'].map('@{0}'.format)

        run_tests(df, rhs, right)

    def test_str_label_slicing_with_negative_step(self):
        SLC = pd.IndexSlice

        def assert_slices_equivalent(l_slc, i_slc):
            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

            if not idx.is_integer:
                # For integer indices, ix and plain getitem are position-based.
                tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
                tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])

        for idx in [_mklbl('A', 20), np.arange(20) + 100,
                    np.linspace(100, 150, 20)]:
            idx = Index(idx)
            s = Series(np.arange(20), index=idx)
            assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
            assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
            assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
            assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])

    def test_slice_with_zero_step_raises(self):
        s = Series(np.arange(20), index=_mklbl('A', 20))
        tm.assert_raises_regex(ValueError, 'slice step cannot be zero',
                               lambda: s[::0])
        tm.assert_raises_regex(ValueError, 'slice step cannot be zero',
                               lambda: s.loc[::0])
        with catch_warnings(record=True):
            tm.assert_raises_regex(ValueError,
                                   'slice step cannot be zero',
                                   lambda: s.ix[::0])

    def test_indexing_assignment_dict_already_exists(self):
        df = pd.DataFrame({'x': [1, 2, 6],
                           'y': [2, 2, 8],
                           'z': [-5, 0, 5]}).set_index('z')
        expected = df.copy()
        rhs = dict(x=9, y=99)
        df.loc[5] = rhs
        expected.loc[5] = [9, 99]
        tm.assert_frame_equal(df, expected)

    def test_indexing_dtypes_on_empty(self):
        # Check that .iloc and .ix return correct dtypes GH9983
        df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']})
        with catch_warnings(record=True):
            df2 = df.ix[[], :]

        assert df2.loc[:, 'a'].dtype == np.int64
        tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0])
        with catch_warnings(record=True):
            tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0])

    def test_range_in_series_indexing(self):
        # range can cause an indexing error
        # GH 11652
        for x in [5, 999999, 1000000]:
            s = pd.Series(index=range(x))
            s.loc[range(1)] = 42
            tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0]))

            s.loc[range(2)] = 43
            tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1]))

    def test_non_reducing_slice(self):
        df = pd.DataFrame([[0, 1], [2, 3]])

        slices = [
            # pd.IndexSlice[:, :],
            pd.IndexSlice[:, 1],
            pd.IndexSlice[1, :],
            pd.IndexSlice[[1], [1]],
            pd.IndexSlice[1, [1]],
            pd.IndexSlice[[1], 1],
            pd.IndexSlice[1],
            pd.IndexSlice[1, 1],
            slice(None, None, None),
            [0, 1],
            np.array([0, 1]),
            pd.Series([0, 1])
        ]
        for slice_ in slices:
            tslice_ = _non_reducing_slice(slice_)
            assert isinstance(df.loc[tslice_], DataFrame)

    def test_list_slice(self):
        # like dataframe getitem
        slices = [['A'], pd.Series(['A']), np.array(['A'])]
        df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B'])
        expected = pd.IndexSlice[:, ['A']]
        for subset in slices:
            result = _non_reducing_slice(subset)
            tm.assert_frame_equal(df.loc[result], df.loc[expected])

    def test_maybe_numeric_slice(self):
        df = pd.DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]})
        result = _maybe_numeric_slice(df, slice_=None)
        expected = pd.IndexSlice[:, ['A']]
        assert result == expected

        result = _maybe_numeric_slice(df, None, include_bool=True)
        expected = pd.IndexSlice[:, ['A', 'C']]
        result = _maybe_numeric_slice(df, [1])
        expected = [1]
        assert result == expected

    def test_partial_boolean_frame_indexing(self):
        # GH 17170
        df = pd.DataFrame(np.arange(9.).reshape(3, 3),
                          index=list('abc'),
                          columns=list('ABC'))
        index_df = pd.DataFrame(1, index=list('ab'), columns=list('AB'))
        result = df[index_df.notnull()]
        expected = pd.DataFrame(np.array([[0., 1., np.nan],
                                          [3., 4., np.nan],
                                          [np.nan] * 3]),
                                index=list('abc'),
                                columns=list('ABC'))
        tm.assert_frame_equal(result, expected)


class TestSeriesNoneCoercion(object):
    EXPECTED_RESULTS = [
        # For numeric series, we should coerce to NaN.
        ([1, 2, 3], [np.nan, 2, 3]),
        ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

        # For datetime series, we should coerce to NaT.
        ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
         [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

        # For objects, we should preserve the None value.
        (["foo", "bar", "baz"], [None, "bar", "baz"]),
    ]

    def test_coercion_with_setitem(self):
        for start_data, expected_result in self.EXPECTED_RESULTS:
            start_series = Series(start_data)
            start_series[0] = None

            expected_series = Series(expected_result)
            tm.assert_series_equal(start_series, expected_series)

    def test_coercion_with_loc_setitem(self):
        for start_data, expected_result in self.EXPECTED_RESULTS:
            start_series = Series(start_data)
            start_series.loc[0] = None

            expected_series = Series(expected_result)
            tm.assert_series_equal(start_series, expected_series)

    def test_coercion_with_setitem_and_series(self):
        for start_data, expected_result in self.EXPECTED_RESULTS:
            start_series = Series(start_data)
            start_series[start_series == start_series[0]] = None

            expected_series = Series(expected_result)
            tm.assert_series_equal(start_series, expected_series)

    def test_coercion_with_loc_and_series(self):
        for start_data, expected_result in self.EXPECTED_RESULTS:
            start_series = Series(start_data)
            start_series.loc[start_series == start_series[0]] = None

            expected_series = Series(expected_result)
            tm.assert_series_equal(start_series, expected_series)


class TestDataframeNoneCoercion(object):
    EXPECTED_SINGLE_ROW_RESULTS = [
        # For numeric series, we should coerce to NaN.
        ([1, 2, 3], [np.nan, 2, 3]),
        ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

        # For datetime series, we should coerce to NaT.
        ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
         [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

        # For objects, we should preserve the None value.
        (["foo", "bar", "baz"], [None, "bar", "baz"]),
    ]

    def test_coercion_with_loc(self):
        for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
            start_dataframe = DataFrame({'foo': start_data})
            start_dataframe.loc[0, ['foo']] = None

            expected_dataframe = DataFrame({'foo': expected_result})
            tm.assert_frame_equal(start_dataframe, expected_dataframe)

    def test_coercion_with_setitem_and_dataframe(self):
        for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
            start_dataframe = DataFrame({'foo': start_data})
            start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][
                0]] = None

            expected_dataframe = DataFrame({'foo': expected_result})
            tm.assert_frame_equal(start_dataframe, expected_dataframe)

    def test_none_coercion_loc_and_dataframe(self):
        for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
            start_dataframe = DataFrame({'foo': start_data})
            start_dataframe.loc[start_dataframe['foo'] == start_dataframe[
                'foo'][0]] = None

            expected_dataframe = DataFrame({'foo': expected_result})
            tm.assert_frame_equal(start_dataframe, expected_dataframe)

    def test_none_coercion_mixed_dtypes(self):
        start_dataframe = DataFrame({
            'a': [1, 2, 3],
            'b': [1.0, 2.0, 3.0],
            'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1,
                                                                       3)],
            'd': ['a', 'b', 'c']
        })
        start_dataframe.iloc[0] = None

        exp = DataFrame({'a': [np.nan, 2, 3],
                         'b': [np.nan, 2.0, 3.0],
                         'c': [NaT, datetime(2000, 1, 2),
                               datetime(2000, 1, 3)],
                         'd': [None, 'b', 'c']})
        tm.assert_frame_equal(start_dataframe, exp)
steminc / pandas python

Products

About

Resources

Contact Gemfury