Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexing / test_loc.py

""" test label based indexing with loc """

from warnings import catch_warnings, filterwarnings

import numpy as np
import pytest

from pandas.compat import PY2, StringIO, lrange

import pandas as pd
from pandas import DataFrame, Series, Timestamp, date_range
from pandas.api.types import is_scalar
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm


class TestLoc(Base):

    def test_loc_getitem_dups(self):
        # GH 5678
        # repeated gettitems on a dup index returning a ndarray
        df = DataFrame(
            np.random.random_sample((20, 5)),
            index=['ABCDE' [x % 5] for x in range(20)])
        expected = df.loc['A', 0]
        result = df.loc[:, 0].loc['A']
        tm.assert_series_equal(result, expected)

    def test_loc_getitem_dups2(self):

        # GH4726
        # dup indexing with iloc/loc
        df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
                       columns=['a', 'a', 'a', 'a', 'a'], index=[1])
        expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
                          index=['a', 'a', 'a', 'a', 'a'], name=1)

        result = df.iloc[0]
        tm.assert_series_equal(result, expected)

        result = df.loc[1]
        tm.assert_series_equal(result, expected)

    def test_loc_setitem_dups(self):

        # GH 6541
        df_orig = DataFrame(
            {'me': list('rttti'),
             'foo': list('aaade'),
             'bar': np.arange(5, dtype='float64') * 1.34 + 2,
             'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')

        indexer = tuple(['r', ['bar', 'bar2']])
        df = df_orig.copy()
        df.loc[indexer] *= 2.0
        tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])

        indexer = tuple(['r', 'bar'])
        df = df_orig.copy()
        df.loc[indexer] *= 2.0
        assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]

        indexer = tuple(['t', ['bar', 'bar2']])
        df = df_orig.copy()
        df.loc[indexer] *= 2.0
        tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])

    def test_loc_setitem_slice(self):
        # GH10503

        # assigning the same type should not change the type
        df1 = DataFrame({'a': [0, 1, 1],
                         'b': Series([100, 200, 300], dtype='uint32')})
        ix = df1['a'] == 1
        newb1 = df1.loc[ix, 'b'] + 1
        df1.loc[ix, 'b'] = newb1
        expected = DataFrame({'a': [0, 1, 1],
                              'b': Series([100, 201, 301], dtype='uint32')})
        tm.assert_frame_equal(df1, expected)

        # assigning a new type should get the inferred type
        df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
                        dtype='uint64')
        ix = df1['a'] == 1
        newb2 = df2.loc[ix, 'b']
        df1.loc[ix, 'b'] = newb2
        expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
                             dtype='uint64')
        tm.assert_frame_equal(df2, expected)

    def test_loc_getitem_int(self):

        # int label
        self.check_result('int label', 'loc', 2, 'ix', 2,
                          typs=['ints', 'uints'], axes=0)
        self.check_result('int label', 'loc', 3, 'ix', 3,
                          typs=['ints', 'uints'], axes=1)
        self.check_result('int label', 'loc', 4, 'ix', 4,
                          typs=['ints', 'uints'], axes=2)
        self.check_result('int label', 'loc', 2, 'ix', 2,
                          typs=['label'], fails=KeyError)

    def test_loc_getitem_label(self):

        # label
        self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
                          axes=0)
        self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
                          axes=0)
        self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
        self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
                          typs=['ts'], axes=0)
        self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
                          fails=KeyError)

    def test_loc_getitem_label_out_of_range(self):

        # out of range label
        self.check_result('label range', 'loc', 'f', 'ix', 'f',
                          typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
                          fails=KeyError)
        self.check_result('label range', 'loc', 'f', 'ix', 'f',
                          typs=['floats'], fails=KeyError)
        self.check_result('label range', 'loc', 20, 'ix', 20,
                          typs=['ints', 'uints', 'mixed'], fails=KeyError)
        self.check_result('label range', 'loc', 20, 'ix', 20,
                          typs=['labels'], fails=TypeError)
        self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
                          axes=0, fails=TypeError)
        self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
                          axes=0, fails=KeyError)

    def test_loc_getitem_label_list(self):

        # list of labels
        self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
                          typs=['ints', 'uints'], axes=0)
        self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
                          typs=['ints', 'uints'], axes=1)
        self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
                          typs=['ints', 'uints'], axes=2)
        self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
                          ['a', 'b', 'd'], typs=['labels'], axes=0)
        self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
                          ['A', 'B', 'C'], typs=['labels'], axes=1)
        self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
                          ['Z', 'Y', 'W'], typs=['labels'], axes=2)
        self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
                          [2, 8, 'null'], typs=['mixed'], axes=0)
        self.check_result('list lbl', 'loc',
                          [Timestamp('20130102'), Timestamp('20130103')], 'ix',
                          [Timestamp('20130102'), Timestamp('20130103')],
                          typs=['ts'], axes=0)

    @pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
                                     "Python 2 (GH #20770)"))
    def test_loc_getitem_label_list_with_missing(self):
        self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
                          typs=['empty'], fails=KeyError)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
                              typs=['ints', 'uints', 'floats'],
                              axes=0, fails=KeyError)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
                              typs=['ints', 'uints', 'floats'],
                              axes=1, fails=KeyError)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
                              typs=['ints', 'uints', 'floats'],
                              axes=2, fails=KeyError)

        # GH 17758 - MultiIndex and missing keys
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
                              'ix', [(1, 3), (1, 4), (2, 5)],
                              typs=['multi'],
                              axes=0)

    def test_getitem_label_list_with_missing(self):
        s = Series(range(3), index=['a', 'b', 'c'])

        # consistency
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            s[['a', 'd']]

        s = Series(range(3))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            s[[0, 3]]

    def test_loc_getitem_label_list_fails(self):
        # fails
        self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
                          typs=['ints', 'uints'], axes=1, fails=KeyError)
        self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
                          typs=['ints', 'uints'], axes=2, fails=KeyError)

    def test_loc_getitem_label_array_like(self):
        # array like
        self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
                          'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
        self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
                          'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
        self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
                          'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)

    def test_loc_getitem_bool(self):
        # boolean indexers
        b = [True, False, True, False]
        self.check_result('bool', 'loc', b, 'ix', b,
                          typs=['ints', 'uints', 'labels',
                                'mixed', 'ts', 'floats'])
        self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
                          fails=KeyError)

    def test_loc_getitem_int_slice(self):

        # ok
        self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
                          typs=['ints', 'uints'], axes=0)
        self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
                          typs=['ints', 'uints'], axes=1)
        self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
                          typs=['ints', 'uints'], axes=2)

    def test_loc_to_fail(self):

        # GH3449
        df = DataFrame(np.random.random((3, 3)),
                       index=['a', 'b', 'c'],
                       columns=['e', 'f', 'g'])

        # raise a KeyError?
        pytest.raises(KeyError, df.loc.__getitem__,
                      tuple([[1, 2], [1, 2]]))

        # GH  7496
        # loc should not fallback

        s = Series()
        s.loc[1] = 1
        s.loc['a'] = 2

        pytest.raises(KeyError, lambda: s.loc[-1])
        pytest.raises(KeyError, lambda: s.loc[[-1, -2]])

        pytest.raises(KeyError, lambda: s.loc[['4']])

        s.loc[-1] = 3
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            result = s.loc[[-1, -2]]
        expected = Series([3, np.nan], index=[-1, -2])
        tm.assert_series_equal(result, expected)

        s['a'] = 2
        pytest.raises(KeyError, lambda: s.loc[[-2]])

        del s['a']

        def f():
            s.loc[[-2]] = 0

        pytest.raises(KeyError, f)

        # inconsistency between .loc[values] and .loc[values,:]
        # GH 7999
        df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])

        def f():
            df.loc[[3], :]

        pytest.raises(KeyError, f)

        def f():
            df.loc[[3]]

        pytest.raises(KeyError, f)

    def test_loc_getitem_list_with_fail(self):
        # 15747
        # should KeyError if *any* missing labels

        s = Series([1, 2, 3])

        s.loc[[2]]

        with pytest.raises(KeyError):
            s.loc[[3]]

        # a non-match and a match
        with tm.assert_produces_warning(FutureWarning):
            expected = s.loc[[2, 3]]
        result = s.reindex([2, 3])
        tm.assert_series_equal(result, expected)

    def test_loc_getitem_label_slice(self):

        # label slices (with ints)
        self.check_result('lab slice', 'loc', slice(1, 3),
                          'ix', slice(1, 3),
                          typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
                          fails=TypeError)

        # real label slices
        self.check_result('lab slice', 'loc', slice('a', 'c'),
                          'ix', slice('a', 'c'), typs=['labels'], axes=0)
        self.check_result('lab slice', 'loc', slice('A', 'C'),
                          'ix', slice('A', 'C'), typs=['labels'], axes=1)
        self.check_result('lab slice', 'loc', slice('W', 'Z'),
                          'ix', slice('W', 'Z'), typs=['labels'], axes=2)

        self.check_result('ts  slice', 'loc', slice('20130102', '20130104'),
                          'ix', slice('20130102', '20130104'),
                          typs=['ts'], axes=0)
        self.check_result('ts  slice', 'loc', slice('20130102', '20130104'),
                          'ix', slice('20130102', '20130104'),
                          typs=['ts'], axes=1, fails=TypeError)
        self.check_result('ts  slice', 'loc', slice('20130102', '20130104'),
                          'ix', slice('20130102', '20130104'),
                          typs=['ts'], axes=2, fails=TypeError)

        # GH 14316
        self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
                          'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)

        self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
                          typs=['mixed'], axes=0, fails=TypeError)
        self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
                          typs=['mixed'], axes=1, fails=KeyError)
        self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
                          typs=['mixed'], axes=2, fails=KeyError)

        self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
            2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)

    def test_loc_index(self):
        # gh-17131
        # a boolean index should index like a boolean numpy array

        df = DataFrame(
            np.random.random(size=(5, 10)),
            index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
Loading ...