# pylint: disable-msg=E1101,W0612
import itertools
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
import pandas.util.testing as tm
class TestSparseArrayConcat(object):
@pytest.mark.parametrize('kind', ['integer', 'block'])
def test_basic(self, kind):
a = pd.SparseArray([1, 0, 0, 2], kind=kind)
b = pd.SparseArray([1, 0, 2, 2], kind=kind)
result = pd.SparseArray._concat_same_type([a, b])
# Can't make any assertions about the sparse index itself
# since we aren't don't merge sparse blocs across arrays
# in to_concat
expected = np.array([1, 2, 1, 2, 2], dtype='int64')
tm.assert_numpy_array_equal(result.sp_values, expected)
assert result.kind == kind
@pytest.mark.parametrize('kind', ['integer', 'block'])
def test_uses_first_kind(self, kind):
other = 'integer' if kind == 'block' else 'block'
a = pd.SparseArray([1, 0, 0, 2], kind=kind)
b = pd.SparseArray([1, 0, 2, 2], kind=other)
result = pd.SparseArray._concat_same_type([a, b])
expected = np.array([1, 2, 1, 2, 2], dtype='int64')
tm.assert_numpy_array_equal(result.sp_values, expected)
assert result.kind == kind
class TestSparseSeriesConcat(object):
@pytest.mark.parametrize('kind', [
'integer',
'block',
])
def test_concat(self, kind):
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
sparse2 = pd.SparseSeries(val2, name='y', kind=kind)
res = pd.concat([sparse1, sparse2])
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
exp = pd.SparseSeries(exp, kind=kind)
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind)
sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind)
res = pd.concat([sparse1, sparse2])
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
def test_concat_axis1(self):
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
sparse1 = pd.SparseSeries(val1, name='x')
sparse2 = pd.SparseSeries(val2, name='y')
res = pd.concat([sparse1, sparse2], axis=1)
exp = pd.concat([pd.Series(val1, name='x'),
pd.Series(val2, name='y')], axis=1)
exp = pd.SparseDataFrame(exp)
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
def test_concat_different_fill(self):
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
for kind in ['integer', 'block']:
sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0)
with tm.assert_produces_warning(PerformanceWarning):
res = pd.concat([sparse1, sparse2])
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
exp = pd.SparseSeries(exp, kind=kind)
tm.assert_sp_series_equal(res, exp)
with tm.assert_produces_warning(PerformanceWarning):
res = pd.concat([sparse2, sparse1])
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
tm.assert_sp_series_equal(res, exp)
def test_concat_axis1_different_fill(self):
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
sparse1 = pd.SparseSeries(val1, name='x')
sparse2 = pd.SparseSeries(val2, name='y', fill_value=0)
res = pd.concat([sparse1, sparse2], axis=1)
exp = pd.concat([pd.Series(val1, name='x'),
pd.Series(val2, name='y')], axis=1)
assert isinstance(res, pd.SparseDataFrame)
tm.assert_frame_equal(res.to_dense(), exp)
def test_concat_different_kind(self):
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
sparse1 = pd.SparseSeries(val1, name='x', kind='integer')
sparse2 = pd.SparseSeries(val2, name='y', kind='block')
res = pd.concat([sparse1, sparse2])
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
exp = pd.SparseSeries(exp, kind=sparse1.kind)
tm.assert_sp_series_equal(res, exp)
res = pd.concat([sparse2, sparse1])
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
exp = pd.SparseSeries(exp, kind=sparse2.kind)
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
@pytest.mark.parametrize('kind', [
'integer',
'block',
])
def test_concat_sparse_dense(self, kind):
# use first input's fill_value
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
val2 = np.array([3, np.nan, 4, 0, 0])
sparse = pd.SparseSeries(val1, name='x', kind=kind)
dense = pd.Series(val2, name='y')
res = pd.concat([sparse, dense])
exp = pd.SparseSeries(pd.concat([pd.Series(val1), dense]), kind=kind)
tm.assert_sp_series_equal(res, exp)
res = pd.concat([dense, sparse, dense])
exp = pd.concat([dense, pd.Series(val1), dense])
# XXX: changed from SparseSeries to Series[sparse]
exp = pd.Series(
pd.SparseArray(exp, kind=kind),
index=exp.index,
name=exp.name,
)
tm.assert_series_equal(res, exp)
sparse = pd.SparseSeries(val1, name='x', kind=kind, fill_value=0)
dense = pd.Series(val2, name='y')
res = pd.concat([sparse, dense])
# XXX: changed from SparseSeries to Series[sparse]
exp = pd.concat([pd.Series(val1), dense])
exp = pd.Series(
pd.SparseArray(exp, kind=kind, fill_value=0),
index=exp.index,
name=exp.name,
)
tm.assert_series_equal(res, exp)
res = pd.concat([dense, sparse, dense])
exp = pd.concat([dense, pd.Series(val1), dense])
# XXX: changed from SparseSeries to Series[sparse]
exp = pd.Series(
pd.SparseArray(exp, kind=kind, fill_value=0),
index=exp.index,
name=exp.name,
)
tm.assert_series_equal(res, exp)
class TestSparseDataFrameConcat(object):
def setup_method(self, method):
self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan],
'B': [0., 0., 0., 0.],
'C': [np.nan, np.nan, np.nan, np.nan],
'D': [1., 2., 3., 4.]})
self.dense2 = pd.DataFrame({'A': [5., 6., 7., 8.],
'B': [np.nan, 0., 7., 8.],
'C': [5., 6., np.nan, np.nan],
'D': [np.nan, np.nan, np.nan, np.nan]})
self.dense3 = pd.DataFrame({'E': [5., 6., 7., 8.],
'F': [np.nan, 0., 7., 8.],
'G': [5., 6., np.nan, np.nan],
'H': [np.nan, np.nan, np.nan, np.nan]})
def test_concat(self):
# fill_value = np.nan
sparse = self.dense1.to_sparse()
sparse2 = self.dense2.to_sparse()
res = pd.concat([sparse, sparse])
exp = pd.concat([self.dense1, self.dense1]).to_sparse()
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse2, sparse2])
exp = pd.concat([self.dense2, self.dense2]).to_sparse()
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse, sparse2])
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse2, sparse])
exp = pd.concat([self.dense2, self.dense1]).to_sparse()
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
# fill_value = 0
sparse = self.dense1.to_sparse(fill_value=0)
sparse2 = self.dense2.to_sparse(fill_value=0)
res = pd.concat([sparse, sparse])
exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse2, sparse2])
exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse, sparse2])
exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
res = pd.concat([sparse2, sparse])
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
def test_concat_different_fill_value(self):
# 1st fill_value will be used
sparse = self.dense1.to_sparse()
sparse2 = self.dense2.to_sparse(fill_value=0)
with tm.assert_produces_warning(PerformanceWarning):
res = pd.concat([sparse, sparse2])
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
with tm.assert_produces_warning(PerformanceWarning):
res = pd.concat([sparse2, sparse])
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
def test_concat_different_columns_sort_warns(self):
sparse = self.dense1.to_sparse()
sparse3 = self.dense3.to_sparse()
with tm.assert_produces_warning(FutureWarning):
res = pd.concat([sparse, sparse3])
with tm.assert_produces_warning(FutureWarning):
exp = pd.concat([self.dense1, self.dense3])
exp = exp.to_sparse()
tm.assert_sp_frame_equal(res, exp, check_kind=False)
def test_concat_different_columns(self):
# fill_value = np.nan
sparse = self.dense1.to_sparse()
sparse3 = self.dense3.to_sparse()
res = pd.concat([sparse, sparse3], sort=True)
exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse()
tm.assert_sp_frame_equal(res, exp, check_kind=False)
res = pd.concat([sparse3, sparse], sort=True)
exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse()
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, check_kind=False)
def test_concat_bug(self):
from pandas.core.sparse.api import SparseDtype
x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan],
fill_value=0)})
y = pd.SparseDataFrame({"B": []})
res = pd.concat([x, y], sort=False)[['A']]
exp = pd.DataFrame({"A": pd.SparseArray([np.nan, np.nan],
dtype=SparseDtype(float, 0))})
tm.assert_frame_equal(res, exp)
def test_concat_different_columns_buggy(self):
sparse = self.dense1.to_sparse(fill_value=0)
sparse3 = self.dense3.to_sparse(fill_value=0)
res = pd.concat([sparse, sparse3], sort=True)
exp = (pd.concat([self.dense1, self.dense3], sort=True)
.to_sparse(fill_value=0))
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, check_kind=False,
consolidate_block_indices=True)
res = pd.concat([sparse3, sparse], sort=True)
exp = (pd.concat([self.dense3, self.dense1], sort=True)
.to_sparse(fill_value=0))
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(res, exp, check_kind=False,
consolidate_block_indices=True)
# different fill values
sparse = self.dense1.to_sparse()
sparse3 = self.dense3.to_sparse(fill_value=0)
# each columns keeps its fill_value, thus compare in dense
res = pd.concat([sparse, sparse3], sort=True)
exp = pd.concat([self.dense1, self.dense3], sort=True)
assert isinstance(res, pd.SparseDataFrame)
tm.assert_frame_equal(res.to_dense(), exp)
res = pd.concat([sparse3, sparse], sort=True)
exp = pd.concat([self.dense3, self.dense1], sort=True)
assert isinstance(res, pd.SparseDataFrame)
tm.assert_frame_equal(res.to_dense(), exp)
def test_concat_series(self):
# fill_value = np.nan
sparse = self.dense1.to_sparse()
sparse2 = self.dense2.to_sparse()
for col in ['A', 'D']:
res = pd.concat([sparse, sparse2[col]])
exp = pd.concat([self.dense1, self.dense2[col]]).to_sparse()
tm.assert_sp_frame_equal(res, exp, check_kind=False)
res = pd.concat([sparse2[col], sparse])
exp = pd.concat([self.dense2[col], self.dense1]).to_sparse()
tm.assert_sp_frame_equal(res, exp, check_kind=False)
# fill_value = 0
sparse = self.dense1.to_sparse(fill_value=0)
sparse2 = self.dense2.to_sparse(fill_value=0)
Loading ...