Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / frame / test_alter_axes.py

# -*- coding: utf-8 -*-

from __future__ import print_function

from datetime import datetime, timedelta
import inspect

import numpy as np
import pytest

from pandas.compat import PY2, lrange

from pandas.core.dtypes.common import (
    is_categorical_dtype, is_interval_dtype, is_object_dtype)

from pandas import (
    Categorical, DataFrame, DatetimeIndex, Index, IntervalIndex, MultiIndex,
    RangeIndex, Series, Timestamp, cut, date_range, to_datetime)
import pandas.util.testing as tm


class TestDataFrameAlterAxes():

    def test_set_index_directly(self, float_string_frame):
        df = float_string_frame
        idx = Index(np.arange(len(df))[::-1])

        df.index = idx
        tm.assert_index_equal(df.index, idx)
        with pytest.raises(ValueError, match='Length mismatch'):
            df.index = idx[::2]

    def test_set_index(self, float_string_frame):
        df = float_string_frame
        idx = Index(np.arange(len(df))[::-1])

        df = df.set_index(idx)
        tm.assert_index_equal(df.index, idx)
        with pytest.raises(ValueError, match='Length mismatch'):
            df.set_index(idx[::2])

    def test_set_index_cast(self):
        # issue casting an index then set_index
        df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]},
                       index=[2010, 2011, 2012])
        df2 = df.set_index(df.index.astype(np.int32))
        tm.assert_frame_equal(df, df2)

    # A has duplicate values, C does not
    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
                                      ('tuple', 'as', 'label')])
    @pytest.mark.parametrize('inplace', [True, False])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_drop_inplace(self, frame_of_index_cols,
                                    drop, inplace, keys):
        df = frame_of_index_cols

        if isinstance(keys, list):
            idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys)
        else:
            idx = Index(df[keys], name=keys)
        expected = df.drop(keys, axis=1) if drop else df
        expected.index = idx

        if inplace:
            result = df.copy()
            result.set_index(keys, drop=drop, inplace=True)
        else:
            result = df.set_index(keys, drop=drop)

        tm.assert_frame_equal(result, expected)

    # A has duplicate values, C does not
    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
                                      ('tuple', 'as', 'label')])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_append(self, frame_of_index_cols, drop, keys):
        df = frame_of_index_cols

        keys = keys if isinstance(keys, list) else [keys]
        idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys],
                                     names=[None] + keys)
        expected = df.drop(keys, axis=1) if drop else df.copy()
        expected.index = idx

        result = df.set_index(keys, drop=drop, append=True)

        tm.assert_frame_equal(result, expected)

    # A has duplicate values, C does not
    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
                                      ('tuple', 'as', 'label')])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_append_to_multiindex(self, frame_of_index_cols,
                                            drop, keys):
        # append to existing multiindex
        df = frame_of_index_cols.set_index(['D'], drop=drop, append=True)

        keys = keys if isinstance(keys, list) else [keys]
        expected = frame_of_index_cols.set_index(['D'] + keys,
                                                 drop=drop, append=True)

        result = df.set_index(keys, drop=drop, append=True)

        tm.assert_frame_equal(result, expected)

    def test_set_index_after_mutation(self):
        # GH1590
        df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']})
        expected = DataFrame({'val': [1, 2]},
                             Index(['b', 'c'], name='key'))

        df2 = df.loc[df.index.map(lambda indx: indx >= 1)]
        result = df2.set_index('key')
        tm.assert_frame_equal(result, expected)

    # MultiIndex constructor does not work directly on Series -> lambda
    # Add list-of-list constructor because list is ambiguous -> lambda
    # also test index name if append=True (name is duplicate here for B)
    @pytest.mark.parametrize('box', [Series, Index, np.array,
                                     list, lambda x: [list(x)],
                                     lambda x: MultiIndex.from_arrays([x])])
    @pytest.mark.parametrize('append, index_name', [(True, None),
                             (True, 'B'), (True, 'test'), (False, None)])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_pass_single_array(self, frame_of_index_cols,
                                         drop, append, index_name, box):
        df = frame_of_index_cols
        df.index.name = index_name

        key = box(df['B'])
        if box == list:
            # list of strings gets interpreted as list of keys
            msg = "['one', 'two', 'three', 'one', 'two']"
            with pytest.raises(KeyError, match=msg):
                df.set_index(key, drop=drop, append=append)
        else:
            # np.array/list-of-list "forget" the name of B
            name_mi = getattr(key, 'names', None)
            name = [getattr(key, 'name', None)] if name_mi is None else name_mi

            result = df.set_index(key, drop=drop, append=append)

            # only valid column keys are dropped
            # since B is always passed as array above, nothing is dropped
            expected = df.set_index(['B'], drop=False, append=append)
            expected.index.names = [index_name] + name if append else name

            tm.assert_frame_equal(result, expected)

    # MultiIndex constructor does not work directly on Series -> lambda
    # also test index name if append=True (name is duplicate here for A & B)
    @pytest.mark.parametrize('box', [Series, Index, np.array, list,
                                     lambda x: MultiIndex.from_arrays([x])])
    @pytest.mark.parametrize('append, index_name',
                             [(True, None), (True, 'A'), (True, 'B'),
                              (True, 'test'), (False, None)])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_pass_arrays(self, frame_of_index_cols,
                                   drop, append, index_name, box):
        df = frame_of_index_cols
        df.index.name = index_name

        keys = ['A', box(df['B'])]
        # np.array/list "forget" the name of B
        names = ['A', None if box in [np.array, list, tuple, iter] else 'B']

        result = df.set_index(keys, drop=drop, append=append)

        # only valid column keys are dropped
        # since B is always passed as array above, only A is dropped, if at all
        expected = df.set_index(['A', 'B'], drop=False, append=append)
        expected = expected.drop('A', axis=1) if drop else expected
        expected.index.names = [index_name] + names if append else names

        tm.assert_frame_equal(result, expected)

    # MultiIndex constructor does not work directly on Series -> lambda
    # We also emulate a "constructor" for the label -> lambda
    # also test index name if append=True (name is duplicate here for A)
    @pytest.mark.parametrize('box2', [Series, Index, np.array, list,
                                      lambda x: MultiIndex.from_arrays([x]),
                                      lambda x: x.name])
    @pytest.mark.parametrize('box1', [Series, Index, np.array, list,
                                      lambda x: MultiIndex.from_arrays([x]),
                                      lambda x: x.name])
    @pytest.mark.parametrize('append, index_name', [(True, None),
                             (True, 'A'), (True, 'test'), (False, None)])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
                                             append, index_name, box1, box2):
        df = frame_of_index_cols
        df.index.name = index_name

        keys = [box1(df['A']), box2(df['A'])]
        result = df.set_index(keys, drop=drop, append=append)

        # need to adapt first drop for case that both keys are 'A' --
        # cannot drop the same column twice;
        # use "is" because == would give ambiguous Boolean error for containers
        first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop

        # to test against already-tested behaviour, we add sequentially,
        # hence second append always True; must wrap keys in list, otherwise
        # box = list would be interpreted as keys
        expected = df.set_index([keys[0]], drop=first_drop, append=append)
        expected = expected.set_index([keys[1]], drop=drop, append=True)
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize('append', [True, False])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_pass_multiindex(self, frame_of_index_cols,
                                       drop, append):
        df = frame_of_index_cols
        keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])

        result = df.set_index(keys, drop=drop, append=append)

        # setting with a MultiIndex will never drop columns
        expected = df.set_index(['A', 'B'], drop=False, append=append)

        tm.assert_frame_equal(result, expected)

    def test_set_index_verify_integrity(self, frame_of_index_cols):
        df = frame_of_index_cols

        with pytest.raises(ValueError, match='Index has duplicate keys'):
            df.set_index('A', verify_integrity=True)
        # with MultiIndex
        with pytest.raises(ValueError, match='Index has duplicate keys'):
            df.set_index([df['A'], df['A']], verify_integrity=True)

    @pytest.mark.parametrize('append', [True, False])
    @pytest.mark.parametrize('drop', [True, False])
    def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
        df = frame_of_index_cols

        with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"):
            # column names are A-E, as well as one tuple
            df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)

        # non-existent key in list with arrays
        with pytest.raises(KeyError, match='X'):
            df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)

        msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]"
        # tuples always raise KeyError
        with pytest.raises(KeyError, match=msg):
            df.set_index(tuple(df['A']), drop=drop, append=append)

        # also within a list
        with pytest.raises(KeyError, match=msg):
            df.set_index(['A', df['A'], tuple(df['A'])],
                         drop=drop, append=append)

    @pytest.mark.xfail(reason='broken due to revert, see GH 25085')
    @pytest.mark.parametrize('append', [True, False])
    @pytest.mark.parametrize('drop', [True, False])
    @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
                             ids=['set', 'iter', 'generator'])
    def test_set_index_raise_on_type(self, frame_of_index_cols, box,
                                     drop, append):
        df = frame_of_index_cols

        msg = 'The parameter "keys" may be a column key, .*'
        # forbidden type, e.g. set/iter/generator
        with pytest.raises(TypeError, match=msg):
            df.set_index(box(df['A']), drop=drop, append=append)

        # forbidden type in list, e.g. set/iter/generator
        with pytest.raises(TypeError, match=msg):
            df.set_index(['A', df['A'], box(df['A'])],
                         drop=drop, append=append)

    def test_set_index_custom_label_type(self):
        # GH 24969

        class Thing(object):
            def __init__(self, name, color):
                self.name = name
                self.color = color

            def __str__(self):
                return "<Thing %r>" % (self.name,)

            # necessary for pretty KeyError
            __repr__ = __str__

        thing1 = Thing('One', 'red')
        thing2 = Thing('Two', 'blue')
        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
        expected = DataFrame({thing1: [0, 1]},
                             index=Index([2, 3], name=thing2))

        # use custom label directly
        result = df.set_index(thing2)
        tm.assert_frame_equal(result, expected)

        # custom label wrapped in list
        result = df.set_index([thing2])
        tm.assert_frame_equal(result, expected)

        # missing key
        thing3 = Thing('Three', 'pink')
        msg = "<Thing 'Three'>"
        with pytest.raises(KeyError, match=msg):
            # missing label directly
            df.set_index(thing3)

        with pytest.raises(KeyError, match=msg):
            # missing label in list
            df.set_index([thing3])

    def test_set_index_custom_label_hashable_iterable(self):
        # GH 24969

        # actual example discussed in GH 24984 was e.g. for shapely.geometry
        # objects (e.g. a collection of Points) that can be both hashable and
        # iterable; using frozenset as a stand-in for testing here

        class Thing(frozenset):
            # need to stabilize repr for KeyError (due to random order in sets)
            def __repr__(self):
                tmp = sorted(list(self))
                # double curly brace prints one brace in format string
                return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))

        thing1 = Thing(['One', 'red'])
        thing2 = Thing(['Two', 'blue'])
        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
        expected = DataFrame({thing1: [0, 1]},
                             index=Index([2, 3], name=thing2))

        # use custom label directly
        result = df.set_index(thing2)
        tm.assert_frame_equal(result, expected)

        # custom label wrapped in list
        result = df.set_index([thing2])
        tm.assert_frame_equal(result, expected)

        # missing key
        thing3 = Thing(['Three', 'pink'])
        msg = '.*'  # due to revert, see GH 25085
        with pytest.raises(KeyError, match=msg):
Loading ...