# pylint: disable-msg=E1101,W0612
from datetime import datetime
import operator
import numpy as np
from numpy import nan
import pytest
from pandas._libs.sparse import BlockIndex, IntIndex
from pandas.compat import PY36, range
from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame, Series, SparseDtype, SparseSeries, bdate_range, compat, isna)
from pandas.core.reshape.util import cartesian_product
import pandas.core.sparse.frame as spf
from pandas.tests.series.test_api import SharedWithSparse
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay
def _test_data1():
# nan-based
arr = np.arange(20, dtype=float)
index = np.arange(20)
arr[:2] = nan
arr[5:10] = nan
arr[-3:] = nan
return arr, index
def _test_data2():
# nan-based
arr = np.arange(15, dtype=float)
index = np.arange(15)
arr[7:12] = nan
arr[-1:] = nan
return arr, index
def _test_data1_zero():
# zero-based
arr, index = _test_data1()
arr[np.isnan(arr)] = 0
return arr, index
def _test_data2_zero():
# zero-based
arr, index = _test_data2()
arr[np.isnan(arr)] = 0
return arr, index
class TestSparseSeries(SharedWithSparse):
series_klass = SparseSeries
# SharedWithSparse tests use generic, series_klass-agnostic assertion
_assert_series_equal = staticmethod(tm.assert_sp_series_equal)
def setup_method(self, method):
arr, index = _test_data1()
date_index = bdate_range('1/1/2011', periods=len(index))
self.bseries = SparseSeries(arr, index=index, kind='block',
name='bseries')
self.ts = self.bseries
self.btseries = SparseSeries(arr, index=date_index, kind='block')
self.iseries = SparseSeries(arr, index=index, kind='integer',
name='iseries')
arr, index = _test_data2()
self.bseries2 = SparseSeries(arr, index=index, kind='block')
self.iseries2 = SparseSeries(arr, index=index, kind='integer')
arr, index = _test_data1_zero()
self.zbseries = SparseSeries(arr, index=index, kind='block',
fill_value=0, name='zbseries')
self.ziseries = SparseSeries(arr, index=index, kind='integer',
fill_value=0)
arr, index = _test_data2_zero()
self.zbseries2 = SparseSeries(arr, index=index, kind='block',
fill_value=0)
self.ziseries2 = SparseSeries(arr, index=index, kind='integer',
fill_value=0)
def test_constructor_dict_input(self):
# gh-16905
constructor_dict = {1: 1.}
index = [0, 1, 2]
# Series with index passed in
series = pd.Series(constructor_dict)
expected = SparseSeries(series, index=index)
result = SparseSeries(constructor_dict, index=index)
tm.assert_sp_series_equal(result, expected)
# Series with index and dictionary with no index
expected = SparseSeries(series)
result = SparseSeries(constructor_dict)
tm.assert_sp_series_equal(result, expected)
def test_constructor_dict_order(self):
# GH19018
# initialization ordering: by insertion order if python>= 3.6, else
# order by value
d = {'b': 1, 'a': 0, 'c': 2}
result = SparseSeries(d)
if PY36:
expected = SparseSeries([1, 0, 2], index=list('bac'))
else:
expected = SparseSeries([0, 1, 2], index=list('abc'))
tm.assert_sp_series_equal(result, expected)
def test_constructor_dtype(self):
arr = SparseSeries([np.nan, 1, 2, np.nan])
assert arr.dtype == SparseDtype(np.float64)
assert np.isnan(arr.fill_value)
arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
assert arr.dtype == SparseDtype(np.float64, 0)
assert arr.fill_value == 0
arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan)
assert arr.dtype == SparseDtype(np.int64, np.nan)
assert np.isnan(arr.fill_value)
arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
assert arr.dtype == SparseDtype(np.int64, 0)
assert arr.fill_value == 0
def test_iteration_and_str(self):
[x for x in self.bseries]
str(self.bseries)
def test_construct_DataFrame_with_sp_series(self):
# it works!
df = DataFrame({'col': self.bseries})
# printing & access
df.iloc[:1]
df['col']
df.dtypes
str(df)
# blocking
expected = Series({'col': 'float64:sparse'})
result = df.ftypes
tm.assert_series_equal(expected, result)
def test_constructor_preserve_attr(self):
arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
s = pd.SparseSeries(arr, name='x')
assert s.dtype == SparseDtype(np.int64)
assert s.fill_value == 0
def test_series_density(self):
# GH2803
ts = Series(np.random.randn(10))
ts[2:-2] = nan
sts = ts.to_sparse()
density = sts.density # don't die
assert density == 4 / 10.0
def test_sparse_to_dense(self):
arr, index = _test_data1()
series = self.bseries.to_dense()
tm.assert_series_equal(series, Series(arr, name='bseries'))
series = self.iseries.to_dense()
tm.assert_series_equal(series, Series(arr, name='iseries'))
arr, index = _test_data1_zero()
series = self.zbseries.to_dense()
tm.assert_series_equal(series, Series(arr, name='zbseries'))
series = self.ziseries.to_dense()
tm.assert_series_equal(series, Series(arr))
def test_to_dense_fill_value(self):
s = pd.Series([1, np.nan, np.nan, 3, np.nan])
res = SparseSeries(s).to_dense()
tm.assert_series_equal(res, s)
res = SparseSeries(s, fill_value=0).to_dense()
tm.assert_series_equal(res, s)
s = pd.Series([1, np.nan, 0, 3, 0])
res = SparseSeries(s, fill_value=0).to_dense()
tm.assert_series_equal(res, s)
res = SparseSeries(s, fill_value=0).to_dense()
tm.assert_series_equal(res, s)
s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
res = SparseSeries(s).to_dense()
tm.assert_series_equal(res, s)
s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
res = SparseSeries(s, fill_value=0).to_dense()
tm.assert_series_equal(res, s)
def test_dense_to_sparse(self):
series = self.bseries.to_dense()
bseries = series.to_sparse(kind='block')
iseries = series.to_sparse(kind='integer')
tm.assert_sp_series_equal(bseries, self.bseries)
tm.assert_sp_series_equal(iseries, self.iseries, check_names=False)
assert iseries.name == self.bseries.name
assert len(series) == len(bseries)
assert len(series) == len(iseries)
assert series.shape == bseries.shape
assert series.shape == iseries.shape
# non-NaN fill value
series = self.zbseries.to_dense()
zbseries = series.to_sparse(kind='block', fill_value=0)
ziseries = series.to_sparse(kind='integer', fill_value=0)
tm.assert_sp_series_equal(zbseries, self.zbseries)
tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False)
assert ziseries.name == self.zbseries.name
assert len(series) == len(zbseries)
assert len(series) == len(ziseries)
assert series.shape == zbseries.shape
assert series.shape == ziseries.shape
def test_to_dense_preserve_name(self):
assert (self.bseries.name is not None)
result = self.bseries.to_dense()
assert result.name == self.bseries.name
def test_constructor(self):
# test setup guys
assert np.isnan(self.bseries.fill_value)
assert isinstance(self.bseries.sp_index, BlockIndex)
assert np.isnan(self.iseries.fill_value)
assert isinstance(self.iseries.sp_index, IntIndex)
assert self.zbseries.fill_value == 0
tm.assert_numpy_array_equal(self.zbseries.values.values,
self.bseries.to_dense().fillna(0).values)
# pass SparseSeries
def _check_const(sparse, name):
# use passed series name
result = SparseSeries(sparse)
tm.assert_sp_series_equal(result, sparse)
assert sparse.name == name
assert result.name == name
# use passed name
result = SparseSeries(sparse, name='x')
tm.assert_sp_series_equal(result, sparse, check_names=False)
assert result.name == 'x'
_check_const(self.bseries, 'bseries')
_check_const(self.iseries, 'iseries')
_check_const(self.zbseries, 'zbseries')
# Sparse time series works
date_index = bdate_range('1/1/2000', periods=len(self.bseries))
s5 = SparseSeries(self.bseries, index=date_index)
assert isinstance(s5, SparseSeries)
# pass Series
bseries2 = SparseSeries(self.bseries.to_dense())
tm.assert_numpy_array_equal(self.bseries.sp_values, bseries2.sp_values)
# pass dict?
# don't copy the data by default
values = np.ones(self.bseries.npoints)
sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
sp.sp_values[:5] = 97
assert values[0] == 97
assert len(sp) == 20
assert sp.shape == (20, )
# but can make it copy!
sp = SparseSeries(values, sparse_index=self.bseries.sp_index,
copy=True)
sp.sp_values[:5] = 100
assert values[0] == 97
assert len(sp) == 20
assert sp.shape == (20, )
def test_constructor_scalar(self):
data = 5
sp = SparseSeries(data, np.arange(100))
sp = sp.reindex(np.arange(200))
assert (sp.loc[:99] == data).all()
assert isna(sp.loc[100:]).all()
data = np.nan
sp = SparseSeries(data, np.arange(100))
assert len(sp) == 100
assert sp.shape == (100, )
def test_constructor_ndarray(self):
pass
def test_constructor_nonnan(self):
arr = [0, 0, 0, nan, nan]
sp_series = SparseSeries(arr, fill_value=0)
tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
assert len(sp_series) == 5
assert sp_series.shape == (5, )
def test_constructor_empty(self):
# see gh-9272
sp = SparseSeries()
assert len(sp.index) == 0
assert sp.shape == (0, )
def test_copy_astype(self):
cop = self.bseries.astype(np.float64)
assert cop is not self.bseries
assert cop.sp_index is self.bseries.sp_index
assert cop.dtype == SparseDtype(np.float64)
cop2 = self.iseries.copy()
tm.assert_sp_series_equal(cop, self.bseries)
Loading ...