Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / sparse / series / test_series.py

# pylint: disable-msg=E1101,W0612

from datetime import datetime
import operator

import numpy as np
from numpy import nan
import pytest

from pandas._libs.sparse import BlockIndex, IntIndex
from pandas.compat import PY36, range
from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    DataFrame, Series, SparseDtype, SparseSeries, bdate_range, compat, isna)
from pandas.core.reshape.util import cartesian_product
import pandas.core.sparse.frame as spf
from pandas.tests.series.test_api import SharedWithSparse
import pandas.util.testing as tm

from pandas.tseries.offsets import BDay


def _test_data1():
    # nan-based
    arr = np.arange(20, dtype=float)
    index = np.arange(20)
    arr[:2] = nan
    arr[5:10] = nan
    arr[-3:] = nan

    return arr, index


def _test_data2():
    # nan-based
    arr = np.arange(15, dtype=float)
    index = np.arange(15)
    arr[7:12] = nan
    arr[-1:] = nan
    return arr, index


def _test_data1_zero():
    # zero-based
    arr, index = _test_data1()
    arr[np.isnan(arr)] = 0
    return arr, index


def _test_data2_zero():
    # zero-based
    arr, index = _test_data2()
    arr[np.isnan(arr)] = 0
    return arr, index


class TestSparseSeries(SharedWithSparse):

    series_klass = SparseSeries
    # SharedWithSparse tests use generic, series_klass-agnostic assertion
    _assert_series_equal = staticmethod(tm.assert_sp_series_equal)

    def setup_method(self, method):
        arr, index = _test_data1()

        date_index = bdate_range('1/1/2011', periods=len(index))

        self.bseries = SparseSeries(arr, index=index, kind='block',
                                    name='bseries')
        self.ts = self.bseries

        self.btseries = SparseSeries(arr, index=date_index, kind='block')

        self.iseries = SparseSeries(arr, index=index, kind='integer',
                                    name='iseries')

        arr, index = _test_data2()
        self.bseries2 = SparseSeries(arr, index=index, kind='block')
        self.iseries2 = SparseSeries(arr, index=index, kind='integer')

        arr, index = _test_data1_zero()
        self.zbseries = SparseSeries(arr, index=index, kind='block',
                                     fill_value=0, name='zbseries')
        self.ziseries = SparseSeries(arr, index=index, kind='integer',
                                     fill_value=0)

        arr, index = _test_data2_zero()
        self.zbseries2 = SparseSeries(arr, index=index, kind='block',
                                      fill_value=0)
        self.ziseries2 = SparseSeries(arr, index=index, kind='integer',
                                      fill_value=0)

    def test_constructor_dict_input(self):
        # gh-16905
        constructor_dict = {1: 1.}
        index = [0, 1, 2]

        # Series with index passed in
        series = pd.Series(constructor_dict)
        expected = SparseSeries(series, index=index)

        result = SparseSeries(constructor_dict, index=index)
        tm.assert_sp_series_equal(result, expected)

        # Series with index and dictionary with no index
        expected = SparseSeries(series)

        result = SparseSeries(constructor_dict)
        tm.assert_sp_series_equal(result, expected)

    def test_constructor_dict_order(self):
        # GH19018
        # initialization ordering: by insertion order if python>= 3.6, else
        # order by value
        d = {'b': 1, 'a': 0, 'c': 2}
        result = SparseSeries(d)
        if PY36:
            expected = SparseSeries([1, 0, 2], index=list('bac'))
        else:
            expected = SparseSeries([0, 1, 2], index=list('abc'))
        tm.assert_sp_series_equal(result, expected)

    def test_constructor_dtype(self):
        arr = SparseSeries([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64)
        assert np.isnan(arr.fill_value)

        arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan)
        assert arr.dtype == SparseDtype(np.int64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

    def test_iteration_and_str(self):
        [x for x in self.bseries]
        str(self.bseries)

    def test_construct_DataFrame_with_sp_series(self):
        # it works!
        df = DataFrame({'col': self.bseries})

        # printing & access
        df.iloc[:1]
        df['col']
        df.dtypes
        str(df)

        # blocking
        expected = Series({'col': 'float64:sparse'})
        result = df.ftypes
        tm.assert_series_equal(expected, result)

    def test_constructor_preserve_attr(self):
        arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        s = pd.SparseSeries(arr, name='x')
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0

    def test_series_density(self):
        # GH2803
        ts = Series(np.random.randn(10))
        ts[2:-2] = nan
        sts = ts.to_sparse()
        density = sts.density  # don't die
        assert density == 4 / 10.0

    def test_sparse_to_dense(self):
        arr, index = _test_data1()
        series = self.bseries.to_dense()
        tm.assert_series_equal(series, Series(arr, name='bseries'))

        series = self.iseries.to_dense()
        tm.assert_series_equal(series, Series(arr, name='iseries'))

        arr, index = _test_data1_zero()
        series = self.zbseries.to_dense()
        tm.assert_series_equal(series, Series(arr, name='zbseries'))

        series = self.ziseries.to_dense()
        tm.assert_series_equal(series, Series(arr))

    def test_to_dense_fill_value(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([1, np.nan, 0, 3, 0])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

    def test_dense_to_sparse(self):
        series = self.bseries.to_dense()
        bseries = series.to_sparse(kind='block')
        iseries = series.to_sparse(kind='integer')
        tm.assert_sp_series_equal(bseries, self.bseries)
        tm.assert_sp_series_equal(iseries, self.iseries, check_names=False)
        assert iseries.name == self.bseries.name

        assert len(series) == len(bseries)
        assert len(series) == len(iseries)
        assert series.shape == bseries.shape
        assert series.shape == iseries.shape

        # non-NaN fill value
        series = self.zbseries.to_dense()
        zbseries = series.to_sparse(kind='block', fill_value=0)
        ziseries = series.to_sparse(kind='integer', fill_value=0)
        tm.assert_sp_series_equal(zbseries, self.zbseries)
        tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False)
        assert ziseries.name == self.zbseries.name

        assert len(series) == len(zbseries)
        assert len(series) == len(ziseries)
        assert series.shape == zbseries.shape
        assert series.shape == ziseries.shape

    def test_to_dense_preserve_name(self):
        assert (self.bseries.name is not None)
        result = self.bseries.to_dense()
        assert result.name == self.bseries.name

    def test_constructor(self):
        # test setup guys
        assert np.isnan(self.bseries.fill_value)
        assert isinstance(self.bseries.sp_index, BlockIndex)
        assert np.isnan(self.iseries.fill_value)
        assert isinstance(self.iseries.sp_index, IntIndex)

        assert self.zbseries.fill_value == 0
        tm.assert_numpy_array_equal(self.zbseries.values.values,
                                    self.bseries.to_dense().fillna(0).values)

        # pass SparseSeries
        def _check_const(sparse, name):
            # use passed series name
            result = SparseSeries(sparse)
            tm.assert_sp_series_equal(result, sparse)
            assert sparse.name == name
            assert result.name == name

            # use passed name
            result = SparseSeries(sparse, name='x')
            tm.assert_sp_series_equal(result, sparse, check_names=False)
            assert result.name == 'x'

        _check_const(self.bseries, 'bseries')
        _check_const(self.iseries, 'iseries')
        _check_const(self.zbseries, 'zbseries')

        # Sparse time series works
        date_index = bdate_range('1/1/2000', periods=len(self.bseries))
        s5 = SparseSeries(self.bseries, index=date_index)
        assert isinstance(s5, SparseSeries)

        # pass Series
        bseries2 = SparseSeries(self.bseries.to_dense())
        tm.assert_numpy_array_equal(self.bseries.sp_values, bseries2.sp_values)

        # pass dict?

        # don't copy the data by default
        values = np.ones(self.bseries.npoints)
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index)
        sp.sp_values[:5] = 97
        assert values[0] == 97

        assert len(sp) == 20
        assert sp.shape == (20, )

        # but can make it copy!
        sp = SparseSeries(values, sparse_index=self.bseries.sp_index,
                          copy=True)
        sp.sp_values[:5] = 100
        assert values[0] == 97

        assert len(sp) == 20
        assert sp.shape == (20, )

    def test_constructor_scalar(self):
        data = 5
        sp = SparseSeries(data, np.arange(100))
        sp = sp.reindex(np.arange(200))
        assert (sp.loc[:99] == data).all()
        assert isna(sp.loc[100:]).all()

        data = np.nan
        sp = SparseSeries(data, np.arange(100))
        assert len(sp) == 100
        assert sp.shape == (100, )

    def test_constructor_ndarray(self):
        pass

    def test_constructor_nonnan(self):
        arr = [0, 0, 0, nan, nan]
        sp_series = SparseSeries(arr, fill_value=0)
        tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
        assert len(sp_series) == 5
        assert sp_series.shape == (5, )

    def test_constructor_empty(self):
        # see gh-9272
        sp = SparseSeries()
        assert len(sp.index) == 0
        assert sp.shape == (0, )

    def test_copy_astype(self):
        cop = self.bseries.astype(np.float64)
        assert cop is not self.bseries
        assert cop.sp_index is self.bseries.sp_index
        assert cop.dtype == SparseDtype(np.float64)

        cop2 = self.iseries.copy()

        tm.assert_sp_series_equal(cop, self.bseries)
Loading ...