Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / indexing / test_chaining_and_caching.py

import numpy as np
import pytest

import pandas as pd
from pandas import (
    DataFrame, Series, Timestamp, compat, date_range, option_context)
from pandas.core import common as com
from pandas.util import testing as tm


class TestCaching(object):

    def test_slice_consolidate_invalidate_item_cache(self):

        # this is chained assignment, but will 'work'
        with option_context('chained_assignment', None):

            # #3970
            df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})

            # Creates a second float block
            df["cc"] = 0.0

            # caches a reference to the 'bb' series
            df["bb"]

            # repr machinery triggers consolidation
            repr(df)

            # Assignment to wrong series
            df['bb'].iloc[0] = 0.17
            df._clear_item_cache()
            tm.assert_almost_equal(df['bb'][0], 0.17)

    def test_setitem_cache_updating(self):
        # GH 5424
        cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']

        for do_ref in [False, False]:
            df = DataFrame({'a': cont,
                            "b": cont[3:] + cont[:3],
                            'c': np.arange(7)})

            # ref the cache
            if do_ref:
                df.loc[0, "c"]

            # set it
            df.loc[7, 'c'] = 1

            assert df.loc[0, 'c'] == 0.0
            assert df.loc[7, 'c'] == 1.0

        # GH 7084
        # not updating cache on series setting with slices
        expected = DataFrame({'A': [600, 600, 600]},
                             index=date_range('5/7/2014', '5/9/2014'))
        out = DataFrame({'A': [0, 0, 0]},
                        index=date_range('5/7/2014', '5/9/2014'))
        df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})

        # loop through df to update out
        six = Timestamp('5/7/2014')
        eix = Timestamp('5/9/2014')
        for ix, row in df.iterrows():
            out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']

        tm.assert_frame_equal(out, expected)
        tm.assert_series_equal(out['A'], expected['A'])

        # try via a chain indexing
        # this actually works
        out = DataFrame({'A': [0, 0, 0]},
                        index=date_range('5/7/2014', '5/9/2014'))
        for ix, row in df.iterrows():
            v = out[row['C']][six:eix] + row['D']
            out[row['C']][six:eix] = v

        tm.assert_frame_equal(out, expected)
        tm.assert_series_equal(out['A'], expected['A'])

        out = DataFrame({'A': [0, 0, 0]},
                        index=date_range('5/7/2014', '5/9/2014'))
        for ix, row in df.iterrows():
            out.loc[six:eix, row['C']] += row['D']

        tm.assert_frame_equal(out, expected)
        tm.assert_series_equal(out['A'], expected['A'])


class TestChaining(object):

    def test_setitem_chained_setfault(self):

        # GH6026
        data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
        mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']

        df = DataFrame({'response': np.array(data)})
        mask = df.response == 'timeout'
        df.response[mask] = 'none'
        tm.assert_frame_equal(df, DataFrame({'response': mdata}))

        recarray = np.rec.fromarrays([data], names=['response'])
        df = DataFrame(recarray)
        mask = df.response == 'timeout'
        df.response[mask] = 'none'
        tm.assert_frame_equal(df, DataFrame({'response': mdata}))

        df = DataFrame({'response': data, 'response1': data})
        mask = df.response == 'timeout'
        df.response[mask] = 'none'
        tm.assert_frame_equal(df, DataFrame({'response': mdata,
                                             'response1': data}))

        # GH 6056
        expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
        df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
        df['A'].iloc[0] = np.nan
        result = df.head()
        tm.assert_frame_equal(result, expected)

        df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
        df.A.iloc[0] = np.nan
        result = df.head()
        tm.assert_frame_equal(result, expected)

    def test_detect_chained_assignment(self):

        pd.set_option('chained_assignment', 'raise')

        # work with the chain
        expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
        df = DataFrame(np.arange(4).reshape(2, 2),
                       columns=list('AB'), dtype='int64')
        assert df._is_copy is None

        df['A'][0] = -5
        df['A'][1] = -6
        tm.assert_frame_equal(df, expected)

        # test with the chaining
        df = DataFrame({'A': Series(range(2), dtype='int64'),
                        'B': np.array(np.arange(2, 4), dtype=np.float64)})
        assert df._is_copy is None

        with pytest.raises(com.SettingWithCopyError):
            df['A'][0] = -5

        with pytest.raises(com.SettingWithCopyError):
            df['A'][1] = np.nan

        assert df['A']._is_copy is None

        # Using a copy (the chain), fails
        df = DataFrame({'A': Series(range(2), dtype='int64'),
                        'B': np.array(np.arange(2, 4), dtype=np.float64)})

        with pytest.raises(com.SettingWithCopyError):
            df.loc[0]['A'] = -5

        # Doc example
        df = DataFrame({'a': ['one', 'one', 'two', 'three',
                              'two', 'one', 'six'],
                        'c': Series(range(7), dtype='int64')})
        assert df._is_copy is None

        with pytest.raises(com.SettingWithCopyError):
            indexer = df.a.str.startswith('o')
            df[indexer]['c'] = 42

        expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
        df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})

        with pytest.raises(com.SettingWithCopyError):
            df['A'][0] = 111

        with pytest.raises(com.SettingWithCopyError):
            df.loc[0]['A'] = 111

        df.loc[0, 'A'] = 111
        tm.assert_frame_equal(df, expected)

        # gh-5475: Make sure that is_copy is picked up reconstruction
        df = DataFrame({"A": [1, 2]})
        assert df._is_copy is None

        with tm.ensure_clean('__tmp__pickle') as path:
            df.to_pickle(path)
            df2 = pd.read_pickle(path)
            df2["B"] = df2["A"]
            df2["B"] = df2["A"]

        # gh-5597: a spurious raise as we are setting the entire column here
        from string import ascii_letters as letters

        def random_text(nobs=100):
            df = []
            for i in range(nobs):
                idx = np.random.randint(len(letters), size=2)
                idx.sort()

                df.append([letters[idx[0]:idx[1]]])

            return DataFrame(df, columns=['letters'])

        df = random_text(100000)

        # Always a copy
        x = df.iloc[[0, 1, 2]]
        assert x._is_copy is not None

        x = df.iloc[[0, 1, 2, 4]]
        assert x._is_copy is not None

        # Explicitly copy
        indexer = df.letters.apply(lambda x: len(x) > 10)
        df = df.loc[indexer].copy()

        assert df._is_copy is None
        df['letters'] = df['letters'].apply(str.lower)

        # Implicitly take
        df = random_text(100000)
        indexer = df.letters.apply(lambda x: len(x) > 10)
        df = df.loc[indexer]

        assert df._is_copy is not None
        df['letters'] = df['letters'].apply(str.lower)

        # Implicitly take 2
        df = random_text(100000)
        indexer = df.letters.apply(lambda x: len(x) > 10)

        df = df.loc[indexer]
        assert df._is_copy is not None
        df.loc[:, 'letters'] = df['letters'].apply(str.lower)

        # Should be ok even though it's a copy!
        assert df._is_copy is None

        df['letters'] = df['letters'].apply(str.lower)
        assert df._is_copy is None

        df = random_text(100000)
        indexer = df.letters.apply(lambda x: len(x) > 10)
        df.loc[indexer, 'letters'] = (
            df.loc[indexer, 'letters'].apply(str.lower))

        # an identical take, so no copy
        df = DataFrame({'a': [1]}).dropna()
        assert df._is_copy is None
        df['a'] += 1

        df = DataFrame(np.random.randn(10, 4))
        s = df.iloc[:, 0].sort_values()

        tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
        tm.assert_series_equal(s, df[0].sort_values())

        # see gh-6025: false positives
        df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
        str(df)

        df['column1'] = df['column1'] + 'b'
        str(df)

        df = df[df['column2'] != 8]
        str(df)

        df['column1'] = df['column1'] + 'c'
        str(df)

        # from SO:
        # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
        df = DataFrame(np.arange(0, 9), columns=['count'])
        df['group'] = 'b'

        with pytest.raises(com.SettingWithCopyError):
            df.iloc[0:5]['group'] = 'a'

        # Mixed type setting but same dtype & changing dtype
        df = DataFrame(dict(A=date_range('20130101', periods=5),
                            B=np.random.randn(5),
                            C=np.arange(5, dtype='int64'),
                            D=list('abcde')))

        with pytest.raises(com.SettingWithCopyError):
            df.loc[2]['D'] = 'foo'

        with pytest.raises(com.SettingWithCopyError):
            df.loc[2]['C'] = 'foo'

        with pytest.raises(com.SettingWithCopyError):
            df['C'][2] = 'foo'

    def test_setting_with_copy_bug(self):

        # operating on a copy
        df = DataFrame({'a': list(range(4)),
                        'b': list('ab..'),
                        'c': ['a', 'b', np.nan, 'd']})
        mask = pd.isna(df.c)

        def f():
            df[['c']][mask] = df[['b']][mask]

        pytest.raises(com.SettingWithCopyError, f)

        # invalid warning as we are returning a new object
        # GH 8730
        df1 = DataFrame({'x': Series(['a', 'b', 'c']),
                         'y': Series(['d', 'e', 'f'])})
        df2 = df1[['x']]

        # this should not raise
        df2['y'] = ['g', 'h', 'i']

    def test_detect_chained_assignment_warnings(self):
        with option_context("chained_assignment", "warn"):
            df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})

            with tm.assert_produces_warning(com.SettingWithCopyWarning):
                df.loc[0]["A"] = 111

    def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
        # xref gh-13017.
        with option_context("chained_assignment", "warn"):
            df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
                              columns=["a", "a", "c"])

            with tm.assert_produces_warning(com.SettingWithCopyWarning):
                df.c.loc[df.c > 0] = None

            expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
                                    columns=["a", "a", "c"])
            tm.assert_frame_equal(df, expected)

    def test_chained_getitem_with_lists(self):

        # GH6394
        # Regression in chained getitem indexing with embedded list-like from
        # 0.12
        def check(result, expected):
            tm.assert_numpy_array_equal(result, expected)
Loading ...