Repository URL to install this package:
Version:
0.24.2 ▾
|
# -*- coding: utf-8 -*-
from __future__ import print_function
# pylint: disable-msg=W0612,E1101
from copy import deepcopy
import pydoc
import numpy as np
import pytest
from pandas.compat import long, lrange, range
import pandas as pd
from pandas import (
Categorical, DataFrame, Series, SparseDataFrame, compat, date_range,
timedelta_range)
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal, assert_frame_equal, assert_series_equal)
class SharedWithSparse(object):
"""
A collection of tests DataFrame and SparseDataFrame can share.
In generic tests on this class, use ``self._assert_frame_equal()`` and
``self._assert_series_equal()`` which are implemented in sub-classes
and dispatch correctly.
"""
def _assert_frame_equal(self, left, right):
"""Dispatch to frame class dependent assertion"""
raise NotImplementedError
def _assert_series_equal(self, left, right):
"""Dispatch to series class dependent assertion"""
raise NotImplementedError
def test_copy_index_name_checking(self, float_frame):
# don't want to be able to modify the index stored elsewhere after
# making a copy
for attr in ('index', 'columns'):
ind = getattr(float_frame, attr)
ind.name = None
cp = float_frame.copy()
getattr(cp, attr).name = 'foo'
assert getattr(float_frame, attr).name is None
def test_getitem_pop_assign_name(self, float_frame):
s = float_frame['A']
assert s.name == 'A'
s = float_frame.pop('A')
assert s.name == 'A'
s = float_frame.loc[:, 'B']
assert s.name == 'B'
s2 = s.loc[:]
assert s2.name == 'B'
def test_get_value(self, float_frame):
for idx in float_frame.index:
for col in float_frame.columns:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = float_frame.get_value(idx, col)
expected = float_frame[col][idx]
tm.assert_almost_equal(result, expected)
def test_add_prefix_suffix(self, float_frame):
with_prefix = float_frame.add_prefix('foo#')
expected = pd.Index(['foo#%s' % c for c in float_frame.columns])
tm.assert_index_equal(with_prefix.columns, expected)
with_suffix = float_frame.add_suffix('#foo')
expected = pd.Index(['%s#foo' % c for c in float_frame.columns])
tm.assert_index_equal(with_suffix.columns, expected)
with_pct_prefix = float_frame.add_prefix('%')
expected = pd.Index(['%{}'.format(c) for c in float_frame.columns])
tm.assert_index_equal(with_pct_prefix.columns, expected)
with_pct_suffix = float_frame.add_suffix('%')
expected = pd.Index(['{}%'.format(c) for c in float_frame.columns])
tm.assert_index_equal(with_pct_suffix.columns, expected)
def test_get_axis(self, float_frame):
f = float_frame
assert f._get_axis_number(0) == 0
assert f._get_axis_number(1) == 1
assert f._get_axis_number('index') == 0
assert f._get_axis_number('rows') == 0
assert f._get_axis_number('columns') == 1
assert f._get_axis_name(0) == 'index'
assert f._get_axis_name(1) == 'columns'
assert f._get_axis_name('index') == 'index'
assert f._get_axis_name('rows') == 'index'
assert f._get_axis_name('columns') == 'columns'
assert f._get_axis(0) is f.index
assert f._get_axis(1) is f.columns
with pytest.raises(ValueError, match='No axis named'):
f._get_axis_number(2)
with pytest.raises(ValueError, match='No axis.*foo'):
f._get_axis_name('foo')
with pytest.raises(ValueError, match='No axis.*None'):
f._get_axis_name(None)
with pytest.raises(ValueError, match='No axis named'):
f._get_axis_number(None)
def test_keys(self, float_frame):
getkeys = float_frame.keys
assert getkeys() is float_frame.columns
def test_column_contains_typeerror(self, float_frame):
try:
float_frame.columns in float_frame
except TypeError:
pass
def test_tab_completion(self):
# DataFrame whose columns are identifiers shall have them in __dir__.
df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
for key in list('ABCD'):
assert key in dir(df)
assert isinstance(df.__getitem__('A'), pd.Series)
# DataFrame whose first-level columns are identifiers shall have
# them in __dir__.
df = pd.DataFrame(
[list('abcd'), list('efgh')],
columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
for key in list('ABCD'):
assert key in dir(df)
for key in list('EFGH'):
assert key not in dir(df)
assert isinstance(df.__getitem__('A'), pd.DataFrame)
def test_not_hashable(self, empty_frame):
df = self.klass([1])
pytest.raises(TypeError, hash, df)
pytest.raises(TypeError, hash, empty_frame)
def test_new_empty_index(self):
df1 = self.klass(np.random.randn(0, 3))
df2 = self.klass(np.random.randn(0, 3))
df1.index.name = 'foo'
assert df2.index.name is None
def test_array_interface(self, float_frame):
with np.errstate(all='ignore'):
result = np.sqrt(float_frame)
assert isinstance(result, type(float_frame))
assert result.index is float_frame.index
assert result.columns is float_frame.columns
self._assert_frame_equal(result, float_frame.apply(np.sqrt))
def test_get_agg_axis(self, float_frame):
cols = float_frame._get_agg_axis(0)
assert cols is float_frame.columns
idx = float_frame._get_agg_axis(1)
assert idx is float_frame.index
pytest.raises(ValueError, float_frame._get_agg_axis, 2)
def test_nonzero(self, float_frame, float_string_frame, empty_frame):
assert empty_frame.empty
assert not float_frame.empty
assert not float_string_frame.empty
# corner case
df = DataFrame({'A': [1., 2., 3.],
'B': ['a', 'b', 'c']},
index=np.arange(3))
del df['A']
assert not df.empty
def test_iteritems(self):
df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
for k, v in compat.iteritems(df):
assert isinstance(v, self.klass._constructor_sliced)
def test_items(self):
# GH 17213, GH 13918
cols = ['a', 'b', 'c']
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
for c, (k, v) in zip(cols, df.items()):
assert c == k
assert isinstance(v, Series)
assert (df[k] == v).all()
def test_iter(self, float_frame):
assert tm.equalContents(list(float_frame), float_frame.columns)
def test_iterrows(self, float_frame, float_string_frame):
for k, v in float_frame.iterrows():
exp = float_frame.loc[k]
self._assert_series_equal(v, exp)
for k, v in float_string_frame.iterrows():
exp = float_string_frame.loc[k]
self._assert_series_equal(v, exp)
def test_iterrows_iso8601(self):
# GH 19671
if self.klass == SparseDataFrame:
pytest.xfail(reason='SparseBlock datetime type not implemented.')
s = self.klass(
{'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'],
'iso8601': date_range('2000-01-01', periods=4, freq='M')})
for k, v in s.iterrows():
exp = s.loc[k]
self._assert_series_equal(v, exp)
def test_itertuples(self, float_frame):
for i, tup in enumerate(float_frame.itertuples()):
s = self.klass._constructor_sliced(tup[1:])
s.name = tup[0]
expected = float_frame.iloc[i, :].reset_index(drop=True)
self._assert_series_equal(s, expected)
df = self.klass({'floats': np.random.randn(5),
'ints': lrange(5)}, columns=['floats', 'ints'])
for tup in df.itertuples(index=False):
assert isinstance(tup[1], (int, long))
df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
dfaa = df[['a', 'a']]
assert (list(dfaa.itertuples()) ==
[(0, 1, 1), (1, 2, 2), (2, 3, 3)])
# repr with be int/long on 32-bit/windows
if not (compat.is_platform_windows() or compat.is_platform_32bit()):
assert (repr(list(df.itertuples(name=None))) ==
'[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
tup = next(df.itertuples(name='TestName'))
assert tup._fields == ('Index', 'a', 'b')
assert (tup.Index, tup.a, tup.b) == tup
assert type(tup).__name__ == 'TestName'
df.columns = ['def', 'return']
tup2 = next(df.itertuples(name='TestName'))
assert tup2 == (0, 1, 4)
assert tup2._fields == ('Index', '_1', '_2')
df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
# will raise SyntaxError if trying to create namedtuple
tup3 = next(df3.itertuples())
assert not hasattr(tup3, '_fields')
assert isinstance(tup3, tuple)
def test_sequence_like_with_categorical(self):
# GH 7839
# make sure can iterate
df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
"raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
df['grade'] = Categorical(df['raw_grade'])
# basic sequencing testing
result = list(df.grade.values)
expected = np.array(df.grade.values).tolist()
tm.assert_almost_equal(result, expected)
# iteration
for t in df.itertuples(index=False):
str(t)
for row, s in df.iterrows():
str(s)
for c, col in df.iteritems():
str(s)
def test_len(self, float_frame):
assert len(float_frame) == len(float_frame.index)
def test_values(self, float_frame, float_string_frame):
frame = float_frame
arr = frame.values
frame_cols = frame.columns
for i, row in enumerate(arr):
for j, value in enumerate(row):
col = frame_cols[j]
if np.isnan(value):
assert np.isnan(frame[col][i])
else:
assert value == frame[col][i]
# mixed type
arr = float_string_frame[['foo', 'A']].values
assert arr[0, 0] == 'bar'
df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]})
arr = df.values
assert arr[0, 0] == 1j
# single block corner case
arr = float_frame[['A', 'B']].values
expected = float_frame.reindex(columns=['A', 'B']).values
assert_almost_equal(arr, expected)
def test_to_numpy(self):
df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
expected = np.array([[1, 3], [2, 4.5]])
result = df.to_numpy()
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_dtype(self):
df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
expected = np.array([[1, 3], [2, 4]], dtype="int64")
result = df.to_numpy(dtype="int64")
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_copy(self):
arr = np.random.randn(4, 3)
df = pd.DataFrame(arr)
assert df.values.base is arr
assert df.to_numpy(copy=False).base is arr
assert df.to_numpy(copy=True).base is None
def test_transpose(self, float_frame):
frame = float_frame
dft = frame.T
for idx, series in compat.iteritems(dft):
for col, value in compat.iteritems(series):
if np.isnan(value):
assert np.isnan(frame[col][idx])
else:
assert value == frame[col][idx]
# mixed type
index, data = tm.getMixedTypeDict()
mixed = self.klass(data, index=index)
mixed_T = mixed.T
for col, s in compat.iteritems(mixed_T):
assert s.dtype == np.object_
def test_swapaxes(self):
df = self.klass(np.random.randn(10, 5))
self._assert_frame_equal(df.T, df.swapaxes(0, 1))
self._assert_frame_equal(df.T, df.swapaxes(1, 0))
self._assert_frame_equal(df, df.swapaxes(0, 0))
pytest.raises(ValueError, df.swapaxes, 2, 5)
def test_axis_aliases(self, float_frame):
f = float_frame
# reg name
expected = f.sum(axis=0)
result = f.sum(axis='index')
assert_series_equal(result, expected)
expected = f.sum(axis=1)
result = f.sum(axis='columns')
assert_series_equal(result, expected)
def test_class_axis(self):
# GH 18147
# no exception and no empty docstring
assert pydoc.getdoc(DataFrame.index)
assert pydoc.getdoc(DataFrame.columns)
def test_more_values(self, float_string_frame):
values = float_string_frame.values
assert values.shape[1] == len(float_string_frame.columns)
def test_repr_with_mi_nat(self, float_string_frame):
df = self.klass({'X': [1, 2]},
index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])
result = repr(df)
expected = ' X\nNaT a 1\n2013-01-01 b 2'
assert result == expected
def test_iteritems_names(self, float_string_frame):
for k, v in compat.iteritems(float_string_frame):
assert v.name == k
def test_series_put_names(self, float_string_frame):
series = float_string_frame._series
for k, v in compat.iteritems(series):
assert v.name == k
def test_empty_nonzero(self):
df = self.klass([1, 2, 3])
assert not df.empty
df = self.klass(index=[1], columns=[1])
assert not df.empty
df = self.klass(index=['a', 'b'], columns=['c', 'd']).dropna()
assert df.empty
assert df.T.empty
empty_frames = [self.klass(),
self.klass(index=[1]),
self.klass(columns=[1]),
self.klass({1: []})]
for df in empty_frames:
assert df.empty
assert df.T.empty
def test_with_datetimelikes(self):
df = self.klass({'A': date_range('20130101', periods=10),
'B': timedelta_range('1 day', periods=10)})
t = df.T
result = t.get_dtype_counts()
if self.klass is DataFrame:
expected = Series({'object': 10})
else:
expected = Series({'Sparse[object, nan]': 10})
tm.assert_series_equal(result, expected)
class TestDataFrameMisc(SharedWithSparse):
klass = DataFrame
# SharedWithSparse tests use generic, klass-agnostic assertion
_assert_frame_equal = staticmethod(assert_frame_equal)
_assert_series_equal = staticmethod(assert_series_equal)
def test_values(self, float_frame):
float_frame.values[:, 0] = 5.
assert (float_frame.values[:, 0] == 5).all()
def test_as_matrix_deprecated(self, float_frame):
# GH 18458
with tm.assert_produces_warning(FutureWarning):
cols = float_frame.columns.tolist()
result = float_frame.as_matrix(columns=cols)
expected = float_frame.values
tm.assert_numpy_array_equal(result, expected)
def test_deepcopy(self, float_frame):
cp = deepcopy(float_frame)
series = cp['A']
series[:] = 10
for idx, value in compat.iteritems(series):
assert float_frame['A'][idx] != value
def test_transpose_get_view(self, float_frame):
dft = float_frame.T
dft.values[:, 5:10] = 5
assert (float_frame.values[5:10] == 5).all()
def test_inplace_return_self(self):
# GH 1893
data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'],
'b': [0, 0, 1, 1],
'c': [1, 2, 3, 4]})
def _check_f(base, f):
result = f(base)
assert result is None
# -----DataFrame-----
# set_index
f = lambda x: x.set_index('a', inplace=True)
_check_f(data.copy(), f)
# reset_index
f = lambda x: x.reset_index(inplace=True)
_check_f(data.set_index('a'), f)
# drop_duplicates
f = lambda x: x.drop_duplicates(inplace=True)
_check_f(data.copy(), f)
# sort
f = lambda x: x.sort_values('b', inplace=True)
_check_f(data.copy(), f)
# sort_index
f = lambda x: x.sort_index(inplace=True)
_check_f(data.copy(), f)
# fillna
f = lambda x: x.fillna(0, inplace=True)
_check_f(data.copy(), f)
# replace
f = lambda x: x.replace(1, 0, inplace=True)
_check_f(data.copy(), f)
# rename
f = lambda x: x.rename({1: 'foo'}, inplace=True)
_check_f(data.copy(), f)
# -----Series-----
d = data.copy()['c']
# reset_index
f = lambda x: x.reset_index(inplace=True, drop=True)
_check_f(data.set_index('a')['c'], f)
# fillna
f = lambda x: x.fillna(0, inplace=True)
_check_f(d.copy(), f)
# replace
f = lambda x: x.replace(1, 0, inplace=True)
_check_f(d.copy(), f)
# rename
f = lambda x: x.rename({1: 'foo'}, inplace=True)
_check_f(d.copy(), f)
def test_tab_complete_warning(self, ip):
# GH 16409
pytest.importorskip('IPython', minversion="6.0.0")
from IPython.core.completer import provisionalcompleter
code = "import pandas as pd; df = pd.DataFrame()"
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter('ignore'):
list(ip.Completer.completions('df.', 1))