Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ core / arrays / period.py

# -*- coding: utf-8 -*-
from datetime import timedelta
import operator

import numpy as np

from pandas._libs.tslibs import (
    NaT, frequencies as libfrequencies, iNaT, period as libperiod)
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.period import (
    DIFFERENT_FREQ, IncompatibleFrequency, Period, get_period_field_arr,
    period_asfreq_arr)
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
import pandas.compat as compat
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
    _TD_DTYPE, ensure_object, is_datetime64_dtype, is_float_dtype,
    is_list_like, is_period_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import (
    ABCDataFrame, ABCIndexClass, ABCPeriodIndex, ABCSeries)
from pandas.core.dtypes.missing import isna, notna

import pandas.core.algorithms as algos
from pandas.core.arrays import datetimelike as dtl
import pandas.core.common as com

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick


def _field_accessor(name, alias, docstring=None):
    def f(self):
        base, mult = libfrequencies.get_freq_code(self.freq)
        result = get_period_field_arr(alias, self.asi8, base)
        return result

    f.__name__ = name
    f.__doc__ = docstring
    return property(f)


def _period_array_cmp(cls, op):
    """
    Wrap comparison operations to convert Period-like to PeriodDtype
    """
    opname = '__{name}__'.format(name=op.__name__)
    nat_result = True if opname == '__ne__' else False

    def wrapper(self, other):
        op = getattr(self.asi8, opname)

        if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
            return NotImplemented

        if is_list_like(other) and len(other) != len(self):
            raise ValueError("Lengths must match")

        if isinstance(other, Period):
            self._check_compatible_with(other)

            result = op(other.ordinal)
        elif isinstance(other, cls):
            self._check_compatible_with(other)

            result = op(other.asi8)

            mask = self._isnan | other._isnan
            if mask.any():
                result[mask] = nat_result

            return result
        elif other is NaT:
            result = np.empty(len(self.asi8), dtype=bool)
            result.fill(nat_result)
        else:
            other = Period(other, freq=self.freq)
            result = op(other.ordinal)

        if self._hasnans:
            result[self._isnan] = nat_result

        return result

    return compat.set_function_name(wrapper, opname, cls)


class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
    """
    Pandas ExtensionArray for storing Period data.

    Users should use :func:`period_array` to create new instances.

    Parameters
    ----------
    values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex]
        The data to store. These should be arrays that can be directly
        converted to ordinals without inference or copy (PeriodArray,
        ndarray[int64]), or a box around such an array (Series[period],
        PeriodIndex).
    freq : str or DateOffset
        The `freq` to use for the array. Mostly applicable when `values`
        is an ndarray of integers, when `freq` is required. When `values`
        is a PeriodArray (or box around), it's checked that ``values.freq``
        matches `freq`.
    copy : bool, default False
        Whether to copy the ordinals before storing.

    See Also
    --------
    period_array : Create a new PeriodArray.
    pandas.PeriodIndex : Immutable Index for period data.

    Notes
    -----
    There are two components to a PeriodArray

    - ordinals : integer ndarray
    - freq : pd.tseries.offsets.Offset

    The values are physically stored as a 1-D ndarray of integers. These are
    called "ordinals" and represent some kind of offset from a base.

    The `freq` indicates the span covered by each element of the array.
    All elements in the PeriodArray have the same `freq`.
    """
    # array priority higher than numpy scalars
    __array_priority__ = 1000
    _attributes = ["freq"]
    _typ = "periodarray"  # ABCPeriodArray
    _scalar_type = Period

    # Names others delegate to us
    _other_ops = []
    _bool_ops = ['is_leap_year']
    _object_ops = ['start_time', 'end_time', 'freq']
    _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
                  'weekofyear', 'weekday', 'week', 'dayofweek',
                  'dayofyear', 'quarter', 'qyear',
                  'days_in_month', 'daysinmonth']
    _datetimelike_ops = _field_ops + _object_ops + _bool_ops
    _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq']

    # --------------------------------------------------------------------
    # Constructors

    def __init__(self, values, freq=None, dtype=None, copy=False):
        freq = validate_dtype_freq(dtype, freq)

        if freq is not None:
            freq = Period._maybe_convert_freq(freq)

        if isinstance(values, ABCSeries):
            values = values._values
            if not isinstance(values, type(self)):
                raise TypeError("Incorrect dtype")

        elif isinstance(values, ABCPeriodIndex):
            values = values._values

        if isinstance(values, type(self)):
            if freq is not None and freq != values.freq:
                msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
                                            own_freq=values.freq.freqstr,
                                            other_freq=freq.freqstr)
                raise IncompatibleFrequency(msg)
            values, freq = values._data, values.freq

        values = np.array(values, dtype='int64', copy=copy)
        self._data = values
        if freq is None:
            raise ValueError('freq is not specified and cannot be inferred')
        self._dtype = PeriodDtype(freq)

    @classmethod
    def _simple_new(cls, values, freq=None, **kwargs):
        # alias for PeriodArray.__init__
        return cls(values, freq=freq, **kwargs)

    @classmethod
    def _from_sequence(cls, scalars, dtype=None, copy=False):
        # type: (Sequence[Optional[Period]], PeriodDtype, bool) -> PeriodArray
        if dtype:
            freq = dtype.freq
        else:
            freq = None

        if isinstance(scalars, cls):
            validate_dtype_freq(scalars.dtype, freq)
            if copy:
                scalars = scalars.copy()
            return scalars

        periods = np.asarray(scalars, dtype=object)
        if copy:
            periods = periods.copy()

        freq = freq or libperiod.extract_freq(periods)
        ordinals = libperiod.extract_ordinals(periods, freq)
        return cls(ordinals, freq=freq)

    @classmethod
    def _from_datetime64(cls, data, freq, tz=None):
        """
        Construct a PeriodArray from a datetime64 array

        Parameters
        ----------
        data : ndarray[datetime64[ns], datetime64[ns, tz]]
        freq : str or Tick
        tz : tzinfo, optional

        Returns
        -------
        PeriodArray[freq]
        """
        data, freq = dt64arr_to_periodarr(data, freq, tz)
        return cls(data, freq=freq)

    @classmethod
    def _generate_range(cls, start, end, periods, freq, fields):
        periods = dtl.validate_periods(periods)

        if freq is not None:
            freq = Period._maybe_convert_freq(freq)

        field_count = len(fields)
        if start is not None or end is not None:
            if field_count > 0:
                raise ValueError('Can either instantiate from fields '
                                 'or endpoints, but not both')
            subarr, freq = _get_ordinal_range(start, end, periods, freq)
        elif field_count > 0:
            subarr, freq = _range_from_fields(freq=freq, **fields)
        else:
            raise ValueError('Not enough parameters to construct '
                             'Period range')

        return subarr, freq

    # -----------------------------------------------------------------
    # DatetimeLike Interface

    def _unbox_scalar(self, value):
        # type: (Union[Period, NaTType]) -> int
        if value is NaT:
            return value.value
        elif isinstance(value, self._scalar_type):
            if not isna(value):
                self._check_compatible_with(value)
            return value.ordinal
        else:
            raise ValueError("'value' should be a Period. Got '{val}' instead."
                             .format(val=value))

    def _scalar_from_string(self, value):
        # type: (str) -> Period
        return Period(value, freq=self.freq)

    def _check_compatible_with(self, other):
        if other is NaT:
            return
        if self.freqstr != other.freqstr:
            _raise_on_incompatible(self, other)

    # --------------------------------------------------------------------
    # Data / Attributes

    @cache_readonly
    def dtype(self):
        return self._dtype

    @property
    def freq(self):
        """
        Return the frequency object for this PeriodArray.
        """
        return self.dtype.freq

    def __array__(self, dtype=None):
        # overriding DatetimelikeArray
        return np.array(list(self), dtype=object)

    # --------------------------------------------------------------------
    # Vectorized analogues of Period properties

    year = _field_accessor('year', 0, "The year of the period")
    month = _field_accessor('month', 3, "The month as January=1, December=12")
    day = _field_accessor('day', 4, "The days of the period")
    hour = _field_accessor('hour', 5, "The hour of the period")
    minute = _field_accessor('minute', 6, "The minute of the period")
    second = _field_accessor('second', 7, "The second of the period")
    weekofyear = _field_accessor('week', 8, "The week ordinal of the year")
    week = weekofyear
    dayofweek = _field_accessor('dayofweek', 10,
                                "The day of the week with Monday=0, Sunday=6")
    weekday = dayofweek
    dayofyear = day_of_year = _field_accessor('dayofyear', 9,
                                              "The ordinal day of the year")
    quarter = _field_accessor('quarter', 2, "The quarter of the date")
    qyear = _field_accessor('qyear', 1)
    days_in_month = _field_accessor('days_in_month', 11,
                                    "The number of days in the month")
    daysinmonth = days_in_month

    @property
    def is_leap_year(self):
        """
        Logical indicating if the date belongs to a leap year
        """
        return isleapyear_arr(np.asarray(self.year))

    @property
    def start_time(self):
        return self.to_timestamp(how='start')

    @property
    def end_time(self):
        return self.to_timestamp(how='end')

    def to_timestamp(self, freq=None, how='start'):
        """
        Cast to DatetimeArray/Index.

        Parameters
        ----------
        freq : string or DateOffset, optional
            Target frequency. The default is 'D' for week or longer,
            'S' otherwise
        how : {'s', 'e', 'start', 'end'}

        Returns
        -------
        DatetimeArray/Index
        """
        from pandas.core.arrays import DatetimeArray

        how = libperiod._validate_end_alias(how)

        end = how == 'E'
        if end:
            if freq == 'B':
                # roll forward to ensure we land on B date
                adjust = Timedelta(1, 'D') - Timedelta(1, 'ns')
Loading ...