from operator import le, lt
import textwrap
import numpy as np
from pandas._libs.interval import (
Interval, IntervalMixin, intervals_to_interval_bounds)
from pandas.compat import add_metaclass
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender
from pandas.util._doctools import _WritableDoc
from pandas.core.dtypes.cast import maybe_convert_platform
from pandas.core.dtypes.common import (
is_categorical_dtype, is_datetime64_any_dtype, is_float_dtype,
is_integer_dtype, is_interval, is_interval_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas.core.dtypes.generic import (
ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries)
from pandas.core.dtypes.missing import isna, notna
from pandas.core.arrays.base import (
ExtensionArray, _extension_array_shared_docs)
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.config import get_option
from pandas.core.indexes.base import Index, ensure_index
_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
_interval_shared_docs = {}
_shared_docs_kwargs = dict(
klass='IntervalArray',
qualname='arrays.IntervalArray',
name=''
)
_interval_shared_docs['class'] = """
%(summary)s
.. versionadded:: %(versionadded)s
.. warning::
The indexing behaviors are provisional and may change in
a future version of pandas.
Parameters
----------
data : array-like (1-dimensional)
Array-like containing Interval objects from which to build the
%(klass)s.
closed : {'left', 'right', 'both', 'neither'}, default 'right'
Whether the intervals are closed on the left-side, right-side, both or
neither.
dtype : dtype or None, default None
If None, dtype will be inferred.
.. versionadded:: 0.23.0
copy : bool, default False
Copy the input data.
%(name)s\
verify_integrity : bool, default True
Verify that the %(klass)s is valid.
Attributes
----------
left
right
closed
mid
length
is_non_overlapping_monotonic
%(extra_attributes)s\
Methods
-------
from_arrays
from_tuples
from_breaks
overlaps
set_closed
to_tuples
%(extra_methods)s\
See Also
--------
Index : The base pandas Index type.
Interval : A bounded slice-like interval; the elements of an %(klass)s.
interval_range : Function to create a fixed frequency IntervalIndex.
cut : Bin values into discrete Intervals.
qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
Notes
------
See the `user guide
<http://pandas.pydata.org/pandas-docs/stable/advanced.html#intervalindex>`_
for more.
%(examples)s\
"""
@Appender(_interval_shared_docs['class'] % dict(
klass="IntervalArray",
summary="Pandas array for interval data that are closed on the same side.",
versionadded="0.24.0",
name='',
extra_attributes='',
extra_methods='',
examples=textwrap.dedent("""\
Examples
--------
A new ``IntervalArray`` can be constructed directly from an array-like of
``Interval`` objects:
>>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
IntervalArray([(0, 1], (1, 5]],
closed='right',
dtype='interval[int64]')
It may also be constructed using one of the constructor
methods: :meth:`IntervalArray.from_arrays`,
:meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
"""),
))
@add_metaclass(_WritableDoc)
class IntervalArray(IntervalMixin, ExtensionArray):
dtype = IntervalDtype()
ndim = 1
can_hold_na = True
_na_value = _fill_value = np.nan
def __new__(cls, data, closed=None, dtype=None, copy=False,
verify_integrity=True):
if isinstance(data, ABCSeries) and is_interval_dtype(data):
data = data.values
if isinstance(data, (cls, ABCIntervalIndex)):
left = data.left
right = data.right
closed = closed or data.closed
else:
# don't allow scalars
if is_scalar(data):
msg = ("{}(...) must be called with a collection of some kind,"
" {} was passed")
raise TypeError(msg.format(cls.__name__, data))
# might need to convert empty or purely na data
data = maybe_convert_platform_interval(data)
left, right, infer_closed = intervals_to_interval_bounds(
data, validate_closed=closed is None)
closed = closed or infer_closed
return cls._simple_new(left, right, closed, copy=copy, dtype=dtype,
verify_integrity=verify_integrity)
@classmethod
def _simple_new(cls, left, right, closed=None,
copy=False, dtype=None, verify_integrity=True):
result = IntervalMixin.__new__(cls)
closed = closed or 'right'
left = ensure_index(left, copy=copy)
right = ensure_index(right, copy=copy)
if dtype is not None:
# GH 19262: dtype must be an IntervalDtype to override inferred
dtype = pandas_dtype(dtype)
if not is_interval_dtype(dtype):
msg = 'dtype must be an IntervalDtype, got {dtype}'
raise TypeError(msg.format(dtype=dtype))
elif dtype.subtype is not None:
left = left.astype(dtype.subtype)
right = right.astype(dtype.subtype)
# coerce dtypes to match if needed
if is_float_dtype(left) and is_integer_dtype(right):
right = right.astype(left.dtype)
elif is_float_dtype(right) and is_integer_dtype(left):
left = left.astype(right.dtype)
if type(left) != type(right):
msg = ('must not have differing left [{ltype}] and right '
'[{rtype}] types')
raise ValueError(msg.format(ltype=type(left).__name__,
rtype=type(right).__name__))
elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
# GH 19016
msg = ('category, object, and string subtypes are not supported '
'for IntervalArray')
raise TypeError(msg)
elif isinstance(left, ABCPeriodIndex):
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
raise ValueError(msg)
elif (isinstance(left, ABCDatetimeIndex) and
str(left.tz) != str(right.tz)):
msg = ("left and right must have the same time zone, got "
"'{left_tz}' and '{right_tz}'")
raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))
result._left = left
result._right = right
result._closed = closed
if verify_integrity:
result._validate()
return result
@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
return cls(scalars, dtype=dtype, copy=copy)
@classmethod
def _from_factorized(cls, values, original):
if len(values) == 0:
# An empty array returns object-dtype here. We can't create
# a new IA from an (empty) object-dtype array, so turn it into the
# correct dtype.
values = values.astype(original.dtype.subtype)
return cls(values, closed=original.closed)
_interval_shared_docs['from_breaks'] = """
Construct an %(klass)s from an array of splits.
Parameters
----------
breaks : array-like (1-dimensional)
Left and right bounds for each interval.
closed : {'left', 'right', 'both', 'neither'}, default 'right'
Whether the intervals are closed on the left-side, right-side, both
or neither.
copy : boolean, default False
copy the data
dtype : dtype or None, default None
If None, dtype will be inferred
.. versionadded:: 0.23.0
See Also
--------
interval_range : Function to create a fixed frequency IntervalIndex.
%(klass)s.from_arrays : Construct from a left and right array.
%(klass)s.from_tuples : Construct from a sequence of tuples.
Examples
--------
>>> pd.%(qualname)s.from_breaks([0, 1, 2, 3])
%(klass)s([(0, 1], (1, 2], (2, 3]],
closed='right',
dtype='interval[int64]')
"""
@classmethod
@Appender(_interval_shared_docs['from_breaks'] % _shared_docs_kwargs)
def from_breaks(cls, breaks, closed='right', copy=False, dtype=None):
breaks = maybe_convert_platform_interval(breaks)
return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy,
dtype=dtype)
_interval_shared_docs['from_arrays'] = """
Construct from two arrays defining the left and right bounds.
Parameters
----------
left : array-like (1-dimensional)
Left bounds for each interval.
right : array-like (1-dimensional)
Right bounds for each interval.
closed : {'left', 'right', 'both', 'neither'}, default 'right'
Whether the intervals are closed on the left-side, right-side, both
or neither.
copy : boolean, default False
Copy the data.
dtype : dtype, optional
If None, dtype will be inferred.
.. versionadded:: 0.23.0
Returns
-------
%(klass)s
Raises
------
ValueError
When a value is missing in only one of `left` or `right`.
When a value in `left` is greater than the corresponding value
in `right`.
See Also
--------
interval_range : Function to create a fixed frequency IntervalIndex.
%(klass)s.from_breaks : Construct an %(klass)s from an array of
splits.
%(klass)s.from_tuples : Construct an %(klass)s from an
array-like of tuples.
Notes
-----
Each element of `left` must be less than or equal to the `right`
element at the same position. If an element is missing, it must be
missing in both `left` and `right`. A TypeError is raised when
using an unsupported type for `left` or `right`. At the moment,
'category', 'object', and 'string' subtypes are not supported.
Examples
--------
>>> %(klass)s.from_arrays([0, 1, 2], [1, 2, 3])
%(klass)s([(0, 1], (1, 2], (2, 3]],
closed='right',
dtype='interval[int64]')
"""
@classmethod
@Appender(_interval_shared_docs['from_arrays'] % _shared_docs_kwargs)
def from_arrays(cls, left, right, closed='right', copy=False, dtype=None):
left = maybe_convert_platform_interval(left)
right = maybe_convert_platform_interval(right)
return cls._simple_new(left, right, closed, copy=copy,
dtype=dtype, verify_integrity=True)
_interval_shared_docs['from_intervals'] = """
Construct an %(klass)s from a 1d array of Interval objects
.. deprecated:: 0.23.0
Parameters
----------
data : array-like (1-dimensional)
Array of Interval objects. All intervals must be closed on the same
sides.
copy : boolean, default False
by-default copy the data, this is compat only and ignored
dtype : dtype or None, default None
If None, dtype will be inferred
..versionadded:: 0.23.0
Loading ...