# -*- coding: utf-8 -*-
from datetime import timedelta
import operator
import numpy as np
from pandas._libs.tslibs import (
NaT, frequencies as libfrequencies, iNaT, period as libperiod)
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ, IncompatibleFrequency, Period, get_period_field_arr,
period_asfreq_arr)
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
import pandas.compat as compat
from pandas.util._decorators import Appender, cache_readonly
from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_object, is_datetime64_dtype, is_float_dtype,
is_list_like, is_period_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCPeriodIndex, ABCSeries)
from pandas.core.dtypes.missing import isna, notna
import pandas.core.algorithms as algos
from pandas.core.arrays import datetimelike as dtl
import pandas.core.common as com
from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick
def _field_accessor(name, alias, docstring=None):
def f(self):
base, mult = libfrequencies.get_freq_code(self.freq)
result = get_period_field_arr(alias, self.asi8, base)
return result
f.__name__ = name
f.__doc__ = docstring
return property(f)
def _period_array_cmp(cls, op):
"""
Wrap comparison operations to convert Period-like to PeriodDtype
"""
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False
def wrapper(self, other):
op = getattr(self.asi8, opname)
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
return NotImplemented
if is_list_like(other) and len(other) != len(self):
raise ValueError("Lengths must match")
if isinstance(other, Period):
self._check_compatible_with(other)
result = op(other.ordinal)
elif isinstance(other, cls):
self._check_compatible_with(other)
result = op(other.asi8)
mask = self._isnan | other._isnan
if mask.any():
result[mask] = nat_result
return result
elif other is NaT:
result = np.empty(len(self.asi8), dtype=bool)
result.fill(nat_result)
else:
other = Period(other, freq=self.freq)
result = op(other.ordinal)
if self._hasnans:
result[self._isnan] = nat_result
return result
return compat.set_function_name(wrapper, opname, cls)
class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
"""
Pandas ExtensionArray for storing Period data.
Users should use :func:`period_array` to create new instances.
Parameters
----------
values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex]
The data to store. These should be arrays that can be directly
converted to ordinals without inference or copy (PeriodArray,
ndarray[int64]), or a box around such an array (Series[period],
PeriodIndex).
freq : str or DateOffset
The `freq` to use for the array. Mostly applicable when `values`
is an ndarray of integers, when `freq` is required. When `values`
is a PeriodArray (or box around), it's checked that ``values.freq``
matches `freq`.
copy : bool, default False
Whether to copy the ordinals before storing.
See Also
--------
period_array : Create a new PeriodArray.
pandas.PeriodIndex : Immutable Index for period data.
Notes
-----
There are two components to a PeriodArray
- ordinals : integer ndarray
- freq : pd.tseries.offsets.Offset
The values are physically stored as a 1-D ndarray of integers. These are
called "ordinals" and represent some kind of offset from a base.
The `freq` indicates the span covered by each element of the array.
All elements in the PeriodArray have the same `freq`.
"""
# array priority higher than numpy scalars
__array_priority__ = 1000
_attributes = ["freq"]
_typ = "periodarray" # ABCPeriodArray
_scalar_type = Period
# Names others delegate to us
_other_ops = []
_bool_ops = ['is_leap_year']
_object_ops = ['start_time', 'end_time', 'freq']
_field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'weekday', 'week', 'dayofweek',
'dayofyear', 'quarter', 'qyear',
'days_in_month', 'daysinmonth']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
_datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq']
# --------------------------------------------------------------------
# Constructors
def __init__(self, values, freq=None, dtype=None, copy=False):
freq = validate_dtype_freq(dtype, freq)
if freq is not None:
freq = Period._maybe_convert_freq(freq)
if isinstance(values, ABCSeries):
values = values._values
if not isinstance(values, type(self)):
raise TypeError("Incorrect dtype")
elif isinstance(values, ABCPeriodIndex):
values = values._values
if isinstance(values, type(self)):
if freq is not None and freq != values.freq:
msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
own_freq=values.freq.freqstr,
other_freq=freq.freqstr)
raise IncompatibleFrequency(msg)
values, freq = values._data, values.freq
values = np.array(values, dtype='int64', copy=copy)
self._data = values
if freq is None:
raise ValueError('freq is not specified and cannot be inferred')
self._dtype = PeriodDtype(freq)
@classmethod
def _simple_new(cls, values, freq=None, **kwargs):
# alias for PeriodArray.__init__
return cls(values, freq=freq, **kwargs)
@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
# type: (Sequence[Optional[Period]], PeriodDtype, bool) -> PeriodArray
if dtype:
freq = dtype.freq
else:
freq = None
if isinstance(scalars, cls):
validate_dtype_freq(scalars.dtype, freq)
if copy:
scalars = scalars.copy()
return scalars
periods = np.asarray(scalars, dtype=object)
if copy:
periods = periods.copy()
freq = freq or libperiod.extract_freq(periods)
ordinals = libperiod.extract_ordinals(periods, freq)
return cls(ordinals, freq=freq)
@classmethod
def _from_datetime64(cls, data, freq, tz=None):
"""
Construct a PeriodArray from a datetime64 array
Parameters
----------
data : ndarray[datetime64[ns], datetime64[ns, tz]]
freq : str or Tick
tz : tzinfo, optional
Returns
-------
PeriodArray[freq]
"""
data, freq = dt64arr_to_periodarr(data, freq, tz)
return cls(data, freq=freq)
@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
periods = dtl.validate_periods(periods)
if freq is not None:
freq = Period._maybe_convert_freq(freq)
field_count = len(fields)
if start is not None or end is not None:
if field_count > 0:
raise ValueError('Can either instantiate from fields '
'or endpoints, but not both')
subarr, freq = _get_ordinal_range(start, end, periods, freq)
elif field_count > 0:
subarr, freq = _range_from_fields(freq=freq, **fields)
else:
raise ValueError('Not enough parameters to construct '
'Period range')
return subarr, freq
# -----------------------------------------------------------------
# DatetimeLike Interface
def _unbox_scalar(self, value):
# type: (Union[Period, NaTType]) -> int
if value is NaT:
return value.value
elif isinstance(value, self._scalar_type):
if not isna(value):
self._check_compatible_with(value)
return value.ordinal
else:
raise ValueError("'value' should be a Period. Got '{val}' instead."
.format(val=value))
def _scalar_from_string(self, value):
# type: (str) -> Period
return Period(value, freq=self.freq)
def _check_compatible_with(self, other):
if other is NaT:
return
if self.freqstr != other.freqstr:
_raise_on_incompatible(self, other)
# --------------------------------------------------------------------
# Data / Attributes
@cache_readonly
def dtype(self):
return self._dtype
@property
def freq(self):
"""
Return the frequency object for this PeriodArray.
"""
return self.dtype.freq
def __array__(self, dtype=None):
# overriding DatetimelikeArray
return np.array(list(self), dtype=object)
# --------------------------------------------------------------------
# Vectorized analogues of Period properties
year = _field_accessor('year', 0, "The year of the period")
month = _field_accessor('month', 3, "The month as January=1, December=12")
day = _field_accessor('day', 4, "The days of the period")
hour = _field_accessor('hour', 5, "The hour of the period")
minute = _field_accessor('minute', 6, "The minute of the period")
second = _field_accessor('second', 7, "The second of the period")
weekofyear = _field_accessor('week', 8, "The week ordinal of the year")
week = weekofyear
dayofweek = _field_accessor('dayofweek', 10,
"The day of the week with Monday=0, Sunday=6")
weekday = dayofweek
dayofyear = day_of_year = _field_accessor('dayofyear', 9,
"The ordinal day of the year")
quarter = _field_accessor('quarter', 2, "The quarter of the date")
qyear = _field_accessor('qyear', 1)
days_in_month = _field_accessor('days_in_month', 11,
"The number of days in the month")
daysinmonth = days_in_month
@property
def is_leap_year(self):
"""
Logical indicating if the date belongs to a leap year
"""
return isleapyear_arr(np.asarray(self.year))
@property
def start_time(self):
return self.to_timestamp(how='start')
@property
def end_time(self):
return self.to_timestamp(how='end')
def to_timestamp(self, freq=None, how='start'):
"""
Cast to DatetimeArray/Index.
Parameters
----------
freq : string or DateOffset, optional
Target frequency. The default is 'D' for week or longer,
'S' otherwise
how : {'s', 'e', 'start', 'end'}
Returns
-------
DatetimeArray/Index
"""
from pandas.core.arrays import DatetimeArray
how = libperiod._validate_end_alias(how)
end = how == 'E'
if end:
if freq == 'B':
# roll forward to ensure we land on B date
adjust = Timedelta(1, 'D') - Timedelta(1, 'ns')
Loading ...