import operator
import re
import warnings
import numpy as np
import pytest
from pandas._libs.sparse import IntIndex
from pandas.compat import range
import pandas.util._test_decorators as td
import pandas as pd
from pandas import isna
from pandas.core.sparse.api import SparseArray, SparseDtype, SparseSeries
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
@pytest.fixture(params=["integer", "block"])
def kind(request):
return request.param
class TestSparseArray(object):
def setup_method(self, method):
self.arr_data = np.array([np.nan, np.nan, 1, 2, 3,
np.nan, 4, 5, np.nan, 6])
self.arr = SparseArray(self.arr_data)
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
def test_constructor_dtype(self):
arr = SparseArray([np.nan, 1, 2, np.nan])
assert arr.dtype == SparseDtype(np.float64, np.nan)
assert arr.dtype.subtype == np.float64
assert np.isnan(arr.fill_value)
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
assert arr.dtype == SparseDtype(np.float64, 0)
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
assert arr.dtype == SparseDtype(np.float64, np.nan)
assert np.isnan(arr.fill_value)
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], dtype=None)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
def test_constructor_dtype_str(self):
result = SparseArray([1, 2, 3], dtype='int')
expected = SparseArray([1, 2, 3], dtype=int)
tm.assert_sp_array_equal(result, expected)
def test_constructor_sparse_dtype(self):
result = SparseArray([1, 0, 0, 1], dtype=SparseDtype('int64', -1))
expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
tm.assert_sp_array_equal(result, expected)
assert result.sp_values.dtype == np.dtype('int64')
def test_constructor_sparse_dtype_str(self):
result = SparseArray([1, 0, 0, 1], dtype='Sparse[int32]')
expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
tm.assert_sp_array_equal(result, expected)
assert result.sp_values.dtype == np.dtype('int32')
def test_constructor_object_dtype(self):
# GH 11856
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
assert arr.dtype == SparseDtype(np.object)
assert np.isnan(arr.fill_value)
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object,
fill_value='A')
assert arr.dtype == SparseDtype(np.object, 'A')
assert arr.fill_value == 'A'
# GH 17574
data = [False, 0, 100.0, 0.0]
arr = SparseArray(data, dtype=np.object, fill_value=False)
assert arr.dtype == SparseDtype(np.object, False)
assert arr.fill_value is False
arr_expected = np.array(data, dtype=np.object)
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
assert np.fromiter(it, dtype=np.bool).all()
@pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
def test_constructor_na_dtype(self, dtype):
with pytest.raises(ValueError, match="Cannot convert"):
SparseArray([0, 1, np.nan], dtype=dtype)
def test_constructor_spindex_dtype(self):
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
# XXX: Behavior change: specifying SparseIndex no longer changes the
# fill_value
expected = SparseArray([0, 1, 2, 0], kind='integer')
tm.assert_sp_array_equal(arr, expected)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2, 3],
sparse_index=IntIndex(4, [1, 2, 3]),
dtype=np.int64, fill_value=0)
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
fill_value=0, dtype=np.int64)
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2, 3],
sparse_index=IntIndex(4, [1, 2, 3]),
dtype=None, fill_value=0)
exp = SparseArray([0, 1, 2, 3], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
@pytest.mark.parametrize("sparse_index", [
None, IntIndex(1, [0]),
])
def test_constructor_spindex_dtype_scalar(self, sparse_index):
# scalar input
arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
exp = SparseArray([1], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
exp = SparseArray([1], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
def test_constructor_spindex_dtype_scalar_broadcasts(self):
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
fill_value=0, dtype=None)
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
@pytest.mark.parametrize('data, fill_value', [
(np.array([1, 2]), 0),
(np.array([1.0, 2.0]), np.nan),
([True, False], False),
([pd.Timestamp('2017-01-01')], pd.NaT),
])
def test_constructor_inferred_fill_value(self, data, fill_value):
result = SparseArray(data).fill_value
if pd.isna(fill_value):
assert pd.isna(result)
else:
assert result == fill_value
@pytest.mark.parametrize('scalar,dtype', [
(False, SparseDtype(bool, False)),
(0.0, SparseDtype('float64', 0)),
(1, SparseDtype('int64', 1)),
('z', SparseDtype('object', 'z'))])
def test_scalar_with_index_infer_dtype(self, scalar, dtype):
# GH 19163
arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == dtype
assert exp.dtype == dtype
@pytest.mark.parametrize("fill", [1, np.nan, 0])
def test_sparse_series_round_trip(self, kind, fill):
# see gh-13999
arr = SparseArray([np.nan, 1, np.nan, 2, 3],
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr), dtype=np.int64)
tm.assert_sp_array_equal(arr, res)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
@pytest.mark.parametrize("fill", [True, False, np.nan])
def test_sparse_series_round_trip2(self, kind, fill):
# see gh-13999
arr = SparseArray([True, False, True, True], dtype=np.bool,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
def test_get_item(self):
assert np.isnan(self.arr[1])
assert self.arr[2] == 1
assert self.arr[7] == 5
assert self.zarr[0] == 0
assert self.zarr[2] == 1
assert self.zarr[7] == 5
errmsg = re.compile("bounds")
with pytest.raises(IndexError, match=errmsg):
self.arr[11]
with pytest.raises(IndexError, match=errmsg):
self.arr[-11]
assert self.arr[-1] == self.arr[len(self.arr) - 1]
def test_take_scalar_raises(self):
msg = "'indices' must be an array, not a scalar '2'."
with pytest.raises(ValueError, match=msg):
self.arr.take(2)
def test_take(self):
exp = SparseArray(np.take(self.arr_data, [2, 3]))
tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)
exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)
def test_take_fill_value(self):
data = np.array([1, np.nan, 0, 3, 0])
sparse = SparseArray(data, fill_value=0)
exp = SparseArray(np.take(data, [0]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([0]), exp)
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
def test_take_negative(self):
exp = SparseArray(np.take(self.arr_data, [-1]))
tm.assert_sp_array_equal(self.arr.take([-1]), exp)
exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)
@pytest.mark.parametrize('fill_value', [0, None, np.nan])
def test_shift_fill_value(self, fill_value):
# GH #24128
sparse = SparseArray(np.array([1, 0, 0, 3, 0]),
fill_value=8.0)
res = sparse.shift(1, fill_value=fill_value)
if isna(fill_value):
fill_value = res.dtype.na_value
exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]),
fill_value=8.0)
tm.assert_sp_array_equal(res, exp)
def test_bad_take(self):
with pytest.raises(IndexError, match="bounds"):
self.arr.take([11])
def test_take_filling(self):
# similar tests as GH 12631
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
# XXX: test change: fill_value=True -> allow_fill=True
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
expected = SparseArray([np.nan, np.nan, np.nan])
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]),
allow_fill=False, fill_value=True)
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
msg = "Invalid value in 'indices'"
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -2]), allow_fill=True)
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -5]), allow_fill=True)
with pytest.raises(IndexError):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]), allow_fill=True)
def test_take_filling_fill_value(self):
# same tests as GH 12631
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# fill_value
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
# XXX: behavior change.
# the old way of filling self.fill_value doesn't follow EA rules.
# It's supposed to be self.dtype.na_value (nan in this case)
expected = SparseArray([0, np.nan, np.nan], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]),
allow_fill=False, fill_value=True)
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
msg = ("Invalid value in 'indices'.")
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -2]), allow_fill=True)
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -5]), allow_fill=True)
with pytest.raises(IndexError):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError):
Loading ...