Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / series.py

"""
Data structure for 1-dimensional cross-sectional and time series data
"""
from __future__ import division

from textwrap import dedent
import warnings

import numpy as np

from pandas._libs import iNaT, index as libindex, lib, tslibs
import pandas.compat as compat
from pandas.compat import PY36, OrderedDict, StringIO, u, zip
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, deprecate
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.common import (
    _is_unorderable_exception, ensure_platform_int, is_bool,
    is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like,
    is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
    is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
    ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries,
    ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
    isna, na_value_for_dtype, notna, remove_na_arraylike)

from pandas.core import algorithms, base, generic, nanops, ops
from pandas.core.accessor import CachedAccessor
from pandas.core.arrays import ExtensionArray, SparseArray
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
import pandas.core.common as com
from pandas.core.config import get_option
from pandas.core.index import (
    Float64Index, Index, InvalidIndexError, MultiIndex, ensure_index)
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
import pandas.core.indexes.base as ibase
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
from pandas.core.internals import SingleBlockManager
from pandas.core.internals.construction import sanitize_array
from pandas.core.strings import StringMethods
from pandas.core.tools.datetimes import to_datetime

import pandas.io.formats.format as fmt
from pandas.io.formats.terminal import get_terminal_size
import pandas.plotting._core as gfx

# pylint: disable=E1101,E1103
# pylint: disable=W0703,W0622,W0613,W0201


__all__ = ['Series']

_shared_doc_kwargs = dict(
    axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
    axis="""axis : {0 or 'index'}
        Parameter needed for compatibility with DataFrame.""",
    inplace="""inplace : boolean, default False
        If True, performs operation inplace and returns None.""",
    unique='np.ndarray', duplicated='Series',
    optional_by='', optional_mapper='', optional_labels='', optional_axis='',
    versionadded_to_excel='\n    .. versionadded:: 0.20.0\n')


# see gh-16971
def remove_na(arr):
    """
    Remove null values from array like structure.

    .. deprecated:: 0.21.0
        Use s[s.notnull()] instead.
    """

    warnings.warn("remove_na is deprecated and is a private "
                  "function. Do not use.", FutureWarning, stacklevel=2)
    return remove_na_arraylike(arr)


def _coerce_method(converter):
    """
    Install the scalar coercion methods.
    """

    def wrapper(self):
        if len(self) == 1:
            return converter(self.iloc[0])
        raise TypeError("cannot convert the series to "
                        "{0}".format(str(converter)))

    wrapper.__name__ = "__{name}__".format(name=converter.__name__)
    return wrapper

# ----------------------------------------------------------------------
# Series class


class Series(base.IndexOpsMixin, generic.NDFrame):
    """
    One-dimensional ndarray with axis labels (including time series).

    Labels need not be unique but must be a hashable type. The object
    supports both integer- and label-based indexing and provides a host of
    methods for performing operations involving the index. Statistical
    methods from ndarray have been overridden to automatically exclude
    missing data (currently represented as NaN).

    Operations between Series (+, -, /, *, **) align values based on their
    associated index values-- they need not be the same length. The result
    index will be the sorted union of the two indexes.

    Parameters
    ----------
    data : array-like, Iterable, dict, or scalar value
        Contains data stored in Series.

        .. versionchanged :: 0.23.0
           If data is a dict, argument order is maintained for Python 3.6
           and later.

    index : array-like or Index (1d)
        Values must be hashable and have the same length as `data`.
        Non-unique index values are allowed. Will default to
        RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
        sequence are used, the index will override the keys found in the
        dict.
    dtype : str, numpy.dtype, or ExtensionDtype, optional
        dtype for the output Series. If not specified, this will be
        inferred from `data`.
        See the :ref:`user guide <basics.dtypes>` for more usages.
    copy : bool, default False
        Copy input data.
    """
    _metadata = ['name']
    _accessors = {'dt', 'cat', 'str', 'sparse'}
    # tolist is not actually deprecated, just suppressed in the __dir__
    _deprecations = generic.NDFrame._deprecations | frozenset(
        ['asobject', 'reshape', 'get_value', 'set_value',
         'from_csv', 'valid', 'tolist'])

    # Override cache_readonly bc Series is mutable
    hasnans = property(base.IndexOpsMixin.hasnans.func,
                       doc=base.IndexOpsMixin.hasnans.__doc__)

    # ----------------------------------------------------------------------
    # Constructors

    def __init__(self, data=None, index=None, dtype=None, name=None,
                 copy=False, fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined
            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()
            if index is None:
                index = data.index

        else:

            if index is not None:
                index = ensure_index(index)

            if data is None:
                data = {}
            if dtype is not None:
                dtype = self._validate_dtype(dtype)

            if isinstance(data, MultiIndex):
                raise NotImplementedError("initializing a Series from a "
                                          "MultiIndex is not supported")
            elif isinstance(data, Index):
                if name is None:
                    name = data.name

                if dtype is not None:
                    # astype copies
                    data = data.astype(dtype)
                else:
                    # need to copy to avoid aliasing issues
                    data = data._values.copy()
                    if (isinstance(data, ABCDatetimeIndex) and
                            data.tz is not None):
                        # GH#24096 need copy to be deep for datetime64tz case
                        # TODO: See if we can avoid these copies
                        data = data._values.copy(deep=True)
                copy = False

            elif isinstance(data, np.ndarray):
                pass
            elif isinstance(data, (ABCSeries, ABCSparseSeries)):
                if name is None:
                    name = data.name
                if index is None:
                    index = data.index
                else:
                    data = data.reindex(index, copy=copy)
                data = data._data
            elif isinstance(data, dict):
                data, index = self._init_dict(data, index, dtype)
                dtype = None
                copy = False
            elif isinstance(data, SingleBlockManager):
                if index is None:
                    index = data.index
                elif not data.index.equals(index) or copy:
                    # GH#19275 SingleBlockManager input should only be called
                    # internally
                    raise AssertionError('Cannot pass both SingleBlockManager '
                                         '`data` argument and a different '
                                         '`index` argument.  `copy` must '
                                         'be False.')

            elif is_extension_array_dtype(data):
                pass
            elif isinstance(data, (set, frozenset)):
                raise TypeError("{0!r} type is unordered"
                                "".format(data.__class__.__name__))
            # If data is Iterable but not list-like, consume into list.
            elif (isinstance(data, compat.Iterable)
                  and not isinstance(data, compat.Sized)):
                data = list(data)
            else:

                # handle sparse passed here (and force conversion)
                if isinstance(data, ABCSparseArray):
                    data = data.to_dense()

            if index is None:
                if not is_list_like(data):
                    data = [data]
                index = ibase.default_index(len(data))
            elif is_list_like(data):

                # a scalar numpy array is list-like but doesn't
                # have a proper length
                try:
                    if len(index) != len(data):
                        raise ValueError(
                            'Length of passed values is {val}, '
                            'index implies {ind}'
                            .format(val=len(data), ind=len(index)))
                except TypeError:
                    pass

            # create/copy the manager
            if isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype=dtype, errors='ignore',
                                       copy=copy)
                elif copy:
                    data = data.copy()
            else:
                data = sanitize_array(data, index, dtype, copy,
                                      raise_cast_failure=True)

                data = SingleBlockManager(data, index, fastpath=True)

        generic.NDFrame.__init__(self, data, fastpath=True)

        self.name = name
        self._set_axis(0, index, fastpath=True)

    def _init_dict(self, data, index=None, dtype=None):
        """
        Derive the "_data" and "index" attributes of a new Series from a
        dictionary input.

        Parameters
        ----------
        data : dict or dict-like
            Data used to populate the new Series
        index : Index or index-like, default None
            index for the new Series: if None, use dict keys
        dtype : dtype, default None
            dtype for the new Series: if None, infer from data

        Returns
        -------
        _data : BlockManager for the new Series
        index : index for the new Series
        """
        # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
        # raises KeyError), so we iterate the entire dict, and align
        if data:
            keys, values = zip(*compat.iteritems(data))
            values = list(values)
        elif index is not None:
            # fastpath for Series(data=None). Just use broadcasting a scalar
            # instead of reindexing.
            values = na_value_for_dtype(dtype)
            keys = index
        else:
            keys, values = [], []

        # Input is now list-like, so rely on "standard" construction:
        s = Series(values, index=keys, dtype=dtype)

        # Now we just make sure the order is respected, if any
        if data and index is not None:
            s = s.reindex(index, copy=False)
        elif not PY36 and not isinstance(data, OrderedDict) and data:
            # Need the `and data` to avoid sorting Series(None, index=[...])
            # since that isn't really dict-like
            try:
                s = s.sort_index()
            except TypeError:
                pass
        return s._data, s.index

    @classmethod
    def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
                   fastpath=False):
        """
        Construct Series from array.

        .. deprecated :: 0.23.0
            Use pd.Series(..) constructor instead.
        """
        warnings.warn("'from_array' is deprecated and will be removed in a "
                      "future version. Please use the pd.Series(..) "
                      "constructor instead.", FutureWarning, stacklevel=2)
        if isinstance(arr, ABCSparseArray):
            from pandas.core.sparse.series import SparseSeries
            cls = SparseSeries
        return cls(arr, index=index, name=name, dtype=dtype,
                   copy=copy, fastpath=fastpath)

    # ----------------------------------------------------------------------

    @property
    def _constructor(self):
        return Series

    @property
    def _constructor_expanddim(self):
        from pandas.core.frame import DataFrame
        return DataFrame
Loading ...