import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame, Series, Timestamp, compat, date_range, option_context)
from pandas.core import common as com
from pandas.util import testing as tm
class TestCaching(object):
def test_slice_consolidate_invalidate_item_cache(self):
# this is chained assignment, but will 'work'
with option_context('chained_assignment', None):
# #3970
df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
# Creates a second float block
df["cc"] = 0.0
# caches a reference to the 'bb' series
df["bb"]
# repr machinery triggers consolidation
repr(df)
# Assignment to wrong series
df['bb'].iloc[0] = 0.17
df._clear_item_cache()
tm.assert_almost_equal(df['bb'][0], 0.17)
def test_setitem_cache_updating(self):
# GH 5424
cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
for do_ref in [False, False]:
df = DataFrame({'a': cont,
"b": cont[3:] + cont[:3],
'c': np.arange(7)})
# ref the cache
if do_ref:
df.loc[0, "c"]
# set it
df.loc[7, 'c'] = 1
assert df.loc[0, 'c'] == 0.0
assert df.loc[7, 'c'] == 1.0
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame({'A': [600, 600, 600]},
index=date_range('5/7/2014', '5/9/2014'))
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
# loop through df to update out
six = Timestamp('5/7/2014')
eix = Timestamp('5/9/2014')
for ix, row in df.iterrows():
out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
# try via a chain indexing
# this actually works
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
for ix, row in df.iterrows():
v = out[row['C']][six:eix] + row['D']
out[row['C']][six:eix] = v
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
for ix, row in df.iterrows():
out.loc[six:eix, row['C']] += row['D']
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
class TestChaining(object):
def test_setitem_chained_setfault(self):
# GH6026
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
df = DataFrame({'response': np.array(data)})
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
recarray = np.rec.fromarrays([data], names=['response'])
df = DataFrame(recarray)
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
df = DataFrame({'response': data, 'response1': data})
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata,
'response1': data}))
# GH 6056
expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
df['A'].iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
df.A.iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
def test_detect_chained_assignment(self):
pd.set_option('chained_assignment', 'raise')
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
df = DataFrame(np.arange(4).reshape(2, 2),
columns=list('AB'), dtype='int64')
assert df._is_copy is None
df['A'][0] = -5
df['A'][1] = -6
tm.assert_frame_equal(df, expected)
# test with the chaining
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = -5
with pytest.raises(com.SettingWithCopyError):
df['A'][1] = np.nan
assert df['A']._is_copy is None
# Using a copy (the chain), fails
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = -5
# Doc example
df = DataFrame({'a': ['one', 'one', 'two', 'three',
'two', 'one', 'six'],
'c': Series(range(7), dtype='int64')})
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
indexer = df.a.str.startswith('o')
df[indexer]['c'] = 42
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = 111
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = 111
df.loc[0, 'A'] = 111
tm.assert_frame_equal(df, expected)
# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df._is_copy is None
with tm.ensure_clean('__tmp__pickle') as path:
df.to_pickle(path)
df2 = pd.read_pickle(path)
df2["B"] = df2["A"]
df2["B"] = df2["A"]
# gh-5597: a spurious raise as we are setting the entire column here
from string import ascii_letters as letters
def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()
df.append([letters[idx[0]:idx[1]]])
return DataFrame(df, columns=['letters'])
df = random_text(100000)
# Always a copy
x = df.iloc[[0, 1, 2]]
assert x._is_copy is not None
x = df.iloc[[0, 1, 2, 4]]
assert x._is_copy is not None
# Explicitly copy
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer].copy()
assert df._is_copy is None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
# Should be ok even though it's a copy!
assert df._is_copy is None
df['letters'] = df['letters'].apply(str.lower)
assert df._is_copy is None
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, 'letters'] = (
df.loc[indexer, 'letters'].apply(str.lower))
# an identical take, so no copy
df = DataFrame({'a': [1]}).dropna()
assert df._is_copy is None
df['a'] += 1
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0].sort_values()
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
tm.assert_series_equal(s, df[0].sort_values())
# see gh-6025: false positives
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
str(df)
df['column1'] = df['column1'] + 'b'
str(df)
df = df[df['column2'] != 8]
str(df)
df['column1'] = df['column1'] + 'c'
str(df)
# from SO:
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
df = DataFrame(np.arange(0, 9), columns=['count'])
df['group'] = 'b'
with pytest.raises(com.SettingWithCopyError):
df.iloc[0:5]['group'] = 'a'
# Mixed type setting but same dtype & changing dtype
df = DataFrame(dict(A=date_range('20130101', periods=5),
B=np.random.randn(5),
C=np.arange(5, dtype='int64'),
D=list('abcde')))
with pytest.raises(com.SettingWithCopyError):
df.loc[2]['D'] = 'foo'
with pytest.raises(com.SettingWithCopyError):
df.loc[2]['C'] = 'foo'
with pytest.raises(com.SettingWithCopyError):
df['C'][2] = 'foo'
def test_setting_with_copy_bug(self):
# operating on a copy
df = DataFrame({'a': list(range(4)),
'b': list('ab..'),
'c': ['a', 'b', np.nan, 'd']})
mask = pd.isna(df.c)
def f():
df[['c']][mask] = df[['b']][mask]
pytest.raises(com.SettingWithCopyError, f)
# invalid warning as we are returning a new object
# GH 8730
df1 = DataFrame({'x': Series(['a', 'b', 'c']),
'y': Series(['d', 'e', 'f'])})
df2 = df1[['x']]
# this should not raise
df2['y'] = ['g', 'h', 'i']
def test_detect_chained_assignment_warnings(self):
with option_context("chained_assignment", "warn"):
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.loc[0]["A"] = 111
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
# xref gh-13017.
with option_context("chained_assignment", "warn"):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
columns=["a", "a", "c"])
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.c.loc[df.c > 0] = None
expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
columns=["a", "a", "c"])
tm.assert_frame_equal(df, expected)
def test_chained_getitem_with_lists(self):
# GH6394
# Regression in chained getitem indexing with embedded list-like from
# 0.12
def check(result, expected):
tm.assert_numpy_array_equal(result, expected)
Loading ...