"""
Data structure for 1-dimensional cross-sectional and time series data
"""
from __future__ import division
from textwrap import dedent
import warnings
import numpy as np
from pandas._libs import iNaT, index as libindex, lib, tslibs
import pandas.compat as compat
from pandas.compat import PY36, OrderedDict, StringIO, u, zip
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, deprecate
from pandas.util._validators import validate_bool_kwarg
from pandas.core.dtypes.common import (
_is_unorderable_exception, ensure_platform_int, is_bool,
is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
isna, na_value_for_dtype, notna, remove_na_arraylike)
from pandas.core import algorithms, base, generic, nanops, ops
from pandas.core.accessor import CachedAccessor
from pandas.core.arrays import ExtensionArray, SparseArray
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
import pandas.core.common as com
from pandas.core.config import get_option
from pandas.core.index import (
Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
import pandas.core.indexes.base as ibase
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
from pandas.core.internals import SingleBlockManager
from pandas.core.internals.construction import sanitize_array
from pandas.core.strings import StringMethods
from pandas.core.tools.datetimes import to_datetime
import pandas.io.formats.format as fmt
from pandas.io.formats.terminal import get_terminal_size
import pandas.plotting._core as gfx
# pylint: disable=E1101,E1103
# pylint: disable=W0703,W0622,W0613,W0201
__all__ = ['Series']
_shared_doc_kwargs = dict(
axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
axis="""axis : {0 or 'index'}
Parameter needed for compatibility with DataFrame.""",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None.""",
unique='np.ndarray', duplicated='Series',
optional_by='', optional_mapper='', optional_labels='', optional_axis='',
versionadded_to_excel='\n .. versionadded:: 0.20.0\n')
# see gh-16971
def remove_na(arr):
"""
Remove null values from array like structure.
.. deprecated:: 0.21.0
Use s[s.notnull()] instead.
"""
warnings.warn("remove_na is deprecated and is a private "
"function. Do not use.", FutureWarning, stacklevel=2)
return remove_na_arraylike(arr)
def _coerce_method(converter):
"""
Install the scalar coercion methods.
"""
def wrapper(self):
if len(self) == 1:
return converter(self.iloc[0])
raise TypeError("cannot convert the series to "
"{0}".format(str(converter)))
wrapper.__name__ = "__{name}__".format(name=converter.__name__)
return wrapper
# ----------------------------------------------------------------------
# Series class
class Series(base.IndexOpsMixin, generic.NDFrame):
"""
One-dimensional ndarray with axis labels (including time series).
Labels need not be unique but must be a hashable type. The object
supports both integer- and label-based indexing and provides a host of
methods for performing operations involving the index. Statistical
methods from ndarray have been overridden to automatically exclude
missing data (currently represented as NaN).
Operations between Series (+, -, /, *, **) align values based on their
associated index values-- they need not be the same length. The result
index will be the sorted union of the two indexes.
Parameters
----------
data : array-like, Iterable, dict, or scalar value
Contains data stored in Series.
.. versionchanged :: 0.23.0
If data is a dict, argument order is maintained for Python 3.6
and later.
index : array-like or Index (1d)
Values must be hashable and have the same length as `data`.
Non-unique index values are allowed. Will default to
RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
sequence are used, the index will override the keys found in the
dict.
dtype : str, numpy.dtype, or ExtensionDtype, optional
dtype for the output Series. If not specified, this will be
inferred from `data`.
See the :ref:`user guide <basics.dtypes>` for more usages.
copy : bool, default False
Copy input data.
"""
_metadata = ['name']
_accessors = {'dt', 'cat', 'str', 'sparse'}
# tolist is not actually deprecated, just suppressed in the __dir__
_deprecations = generic.NDFrame._deprecations | frozenset(
['asobject', 'reshape', 'get_value', 'set_value',
'from_csv', 'valid', 'tolist'])
# Override cache_readonly bc Series is mutable
hasnans = property(base.IndexOpsMixin.hasnans.func,
doc=base.IndexOpsMixin.hasnans.__doc__)
# ----------------------------------------------------------------------
# Constructors
def __init__(self, data=None, index=None, dtype=None, name=None,
copy=False, fastpath=False):
# we are called internally, so short-circuit
if fastpath:
# data is an ndarray, index is defined
if not isinstance(data, SingleBlockManager):
data = SingleBlockManager(data, index, fastpath=True)
if copy:
data = data.copy()
if index is None:
index = data.index
else:
if index is not None:
index = ensure_index(index)
if data is None:
data = {}
if dtype is not None:
dtype = self._validate_dtype(dtype)
if isinstance(data, MultiIndex):
raise NotImplementedError("initializing a Series from a "
"MultiIndex is not supported")
elif isinstance(data, Index):
if name is None:
name = data.name
if dtype is not None:
# astype copies
data = data.astype(dtype)
else:
# need to copy to avoid aliasing issues
data = data._values.copy()
if (isinstance(data, ABCDatetimeIndex) and
data.tz is not None):
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
data = data._values.copy(deep=True)
copy = False
elif isinstance(data, np.ndarray):
pass
elif isinstance(data, (ABCSeries, ABCSparseSeries)):
if name is None:
name = data.name
if index is None:
index = data.index
else:
data = data.reindex(index, copy=copy)
data = data._data
elif isinstance(data, dict):
data, index = self._init_dict(data, index, dtype)
dtype = None
copy = False
elif isinstance(data, SingleBlockManager):
if index is None:
index = data.index
elif not data.index.equals(index) or copy:
# GH#19275 SingleBlockManager input should only be called
# internally
raise AssertionError('Cannot pass both SingleBlockManager '
'`data` argument and a different '
'`index` argument. `copy` must '
'be False.')
elif is_extension_array_dtype(data):
pass
elif isinstance(data, (set, frozenset)):
raise TypeError("{0!r} type is unordered"
"".format(data.__class__.__name__))
# If data is Iterable but not list-like, consume into list.
elif (isinstance(data, compat.Iterable)
and not isinstance(data, compat.Sized)):
data = list(data)
else:
# handle sparse passed here (and force conversion)
if isinstance(data, ABCSparseArray):
data = data.to_dense()
if index is None:
if not is_list_like(data):
data = [data]
index = ibase.default_index(len(data))
elif is_list_like(data):
# a scalar numpy array is list-like but doesn't
# have a proper length
try:
if len(index) != len(data):
raise ValueError(
'Length of passed values is {val}, '
'index implies {ind}'
.format(val=len(data), ind=len(index)))
except TypeError:
pass
# create/copy the manager
if isinstance(data, SingleBlockManager):
if dtype is not None:
data = data.astype(dtype=dtype, errors='ignore',
copy=copy)
elif copy:
data = data.copy()
else:
data = sanitize_array(data, index, dtype, copy,
raise_cast_failure=True)
data = SingleBlockManager(data, index, fastpath=True)
generic.NDFrame.__init__(self, data, fastpath=True)
self.name = name
self._set_axis(0, index, fastpath=True)
def _init_dict(self, data, index=None, dtype=None):
"""
Derive the "_data" and "index" attributes of a new Series from a
dictionary input.
Parameters
----------
data : dict or dict-like
Data used to populate the new Series
index : Index or index-like, default None
index for the new Series: if None, use dict keys
dtype : dtype, default None
dtype for the new Series: if None, infer from data
Returns
-------
_data : BlockManager for the new Series
index : index for the new Series
"""
# Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
# raises KeyError), so we iterate the entire dict, and align
if data:
keys, values = zip(*compat.iteritems(data))
values = list(values)
elif index is not None:
# fastpath for Series(data=None). Just use broadcasting a scalar
# instead of reindexing.
values = na_value_for_dtype(dtype)
keys = index
else:
keys, values = [], []
# Input is now list-like, so rely on "standard" construction:
s = Series(values, index=keys, dtype=dtype)
# Now we just make sure the order is respected, if any
if data and index is not None:
s = s.reindex(index, copy=False)
elif not PY36 and not isinstance(data, OrderedDict) and data:
# Need the `and data` to avoid sorting Series(None, index=[...])
# since that isn't really dict-like
try:
s = s.sort_index()
except TypeError:
pass
return s._data, s.index
@classmethod
def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
fastpath=False):
"""
Construct Series from array.
.. deprecated :: 0.23.0
Use pd.Series(..) constructor instead.
"""
warnings.warn("'from_array' is deprecated and will be removed in a "
"future version. Please use the pd.Series(..) "
"constructor instead.", FutureWarning, stacklevel=2)
if isinstance(arr, ABCSparseArray):
from pandas.core.sparse.series import SparseSeries
cls = SparseSeries
return cls(arr, index=index, name=name, dtype=dtype,
copy=copy, fastpath=fastpath)
# ----------------------------------------------------------------------
@property
def _constructor(self):
return Series
@property
def _constructor_expanddim(self):
from pandas.core.frame import DataFrame
return DataFrame
Loading ...