Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / resample.py

import copy
from datetime import timedelta
from textwrap import dedent
import warnings

import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import NaT, Timestamp
from pandas._libs.tslibs.frequencies import is_subperiod, is_superperiod
from pandas._libs.tslibs.period import IncompatibleFrequency
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution

from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

import pandas as pd
import pandas.core.algorithms as algos
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.groupby.generic import PanelGroupBy, SeriesGroupBy
from pandas.core.groupby.groupby import (
    GroupBy, _GroupBy, _pipe_template, groupby)
from pandas.core.groupby.grouper import Grouper
from pandas.core.groupby.ops import BinGrouper
from pandas.core.indexes.datetimes import DatetimeIndex, date_range
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range

from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import DateOffset, Day, Nano, Tick

_shared_docs_kwargs = dict()


class Resampler(_GroupBy):

    """
    Class for resampling datetimelike data, a groupby-like operation.
    See aggregate, transform, and apply functions on this object.

    It's easiest to use obj.resample(...) to use Resampler.

    Parameters
    ----------
    obj : pandas object
    groupby : a TimeGrouper object
    axis : int, default 0
    kind : str or None
        'period', 'timestamp' to override default index treatement

    Returns
    -------
    a Resampler of the appropriate type

    Notes
    -----
    After resampling, see aggregate, apply, and transform functions.
    """

    # to the groupby descriptor
    _attributes = ['freq', 'axis', 'closed', 'label', 'convention',
                   'loffset', 'base', 'kind']

    def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
        self.groupby = groupby
        self.keys = None
        self.sort = True
        self.axis = axis
        self.kind = kind
        self.squeeze = False
        self.group_keys = True
        self.as_index = True
        self.exclusions = set()
        self.binner = None
        self.grouper = None

        if self.groupby is not None:
            self.groupby._set_grouper(self._convert_obj(obj), sort=True)

    def __unicode__(self):
        """
        Provide a nice str repr of our rolling object.
        """
        attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k))
                 for k in self._attributes if
                 getattr(self.groupby, k, None) is not None]
        return "{klass} [{attrs}]".format(klass=self.__class__.__name__,
                                          attrs=', '.join(attrs))

    def __getattr__(self, attr):
        if attr in self._internal_names_set:
            return object.__getattribute__(self, attr)
        if attr in self._attributes:
            return getattr(self.groupby, attr)
        if attr in self.obj:
            return self[attr]

        return object.__getattribute__(self, attr)

    def __iter__(self):
        """
        Resampler iterator.

        Returns
        -------
        Generator yielding sequence of (name, subsetted object)
        for each group

        See Also
        --------
        GroupBy.__iter__
        """
        self._set_binner()
        return super(Resampler, self).__iter__()

    @property
    def obj(self):
        return self.groupby.obj

    @property
    def ax(self):
        return self.groupby.ax

    @property
    def _typ(self):
        """
        Masquerade for compat as a Series or a DataFrame.
        """
        if isinstance(self._selected_obj, pd.Series):
            return 'series'
        return 'dataframe'

    @property
    def _from_selection(self):
        """
        Is the resampling from a DataFrame column or MultiIndex level.
        """
        # upsampling and PeriodIndex resampling do not work
        # with selection, this state used to catch and raise an error
        return (self.groupby is not None and
                (self.groupby.key is not None or
                 self.groupby.level is not None))

    def _convert_obj(self, obj):
        """
        Provide any conversions for the object in order to correctly handle.

        Parameters
        ----------
        obj : the object to be resampled

        Returns
        -------
        obj : converted object
        """
        obj = obj._consolidate()
        return obj

    def _get_binner_for_time(self):
        raise AbstractMethodError(self)

    def _set_binner(self):
        """
        Setup our binners.

        Cache these as we are an immutable object
        """
        if self.binner is None:
            self.binner, self.grouper = self._get_binner()

    def _get_binner(self):
        """
        Create the BinGrouper, assume that self.set_grouper(obj)
        has already been called.
        """

        binner, bins, binlabels = self._get_binner_for_time()
        bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
        return binner, bin_grouper

    def _assure_grouper(self):
        """
        Make sure that we are creating our binner & grouper.
        """
        self._set_binner()

    @Substitution(klass='Resampler',
                  versionadded='.. versionadded:: 0.23.0',
                  examples="""
    >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
    ...                   index=pd.date_range('2012-08-02', periods=4))
    >>> df
                A
    2012-08-02  1
    2012-08-03  2
    2012-08-04  3
    2012-08-05  4

    To get the difference between each 2-day period's maximum and minimum
    value in one pass, you can do

    >>> df.resample('2D').pipe(lambda x: x.max() - x.min())
                A
    2012-08-02  1
    2012-08-04  1
    """)
    @Appender(_pipe_template)
    def pipe(self, func, *args, **kwargs):
        return super(Resampler, self).pipe(func, *args, **kwargs)

    _agg_see_also_doc = dedent("""
    See Also
    --------
    pandas.DataFrame.groupby.aggregate
    pandas.DataFrame.resample.transform
    pandas.DataFrame.aggregate
    """)

    _agg_examples_doc = dedent("""
    Examples
    --------
    >>> s = pd.Series([1,2,3,4,5],
                      index=pd.date_range('20130101', periods=5,freq='s'))
    2013-01-01 00:00:00    1
    2013-01-01 00:00:01    2
    2013-01-01 00:00:02    3
    2013-01-01 00:00:03    4
    2013-01-01 00:00:04    5
    Freq: S, dtype: int64

    >>> r = s.resample('2s')
    DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left,
                            label=left, convention=start, base=0]

    >>> r.agg(np.sum)
    2013-01-01 00:00:00    3
    2013-01-01 00:00:02    7
    2013-01-01 00:00:04    5
    Freq: 2S, dtype: int64

    >>> r.agg(['sum','mean','max'])
                         sum  mean  max
    2013-01-01 00:00:00    3   1.5    2
    2013-01-01 00:00:02    7   3.5    4
    2013-01-01 00:00:04    5   5.0    5

    >>> r.agg({'result' : lambda x: x.mean() / x.std(),
               'total' : np.sum})
                         total    result
    2013-01-01 00:00:00      3  2.121320
    2013-01-01 00:00:02      7  4.949747
    2013-01-01 00:00:04      5       NaN
    """)

    @Substitution(see_also=_agg_see_also_doc,
                  examples=_agg_examples_doc,
                  versionadded='',
                  klass='DataFrame',
                  axis='')
    @Appender(_shared_docs['aggregate'])
    def aggregate(self, func, *args, **kwargs):

        self._set_binner()
        result, how = self._aggregate(func, *args, **kwargs)
        if result is None:
            how = func
            grouper = None
            result = self._groupby_and_aggregate(how,
                                                 grouper,
                                                 *args,
                                                 **kwargs)

        result = self._apply_loffset(result)
        return result

    agg = aggregate
    apply = aggregate

    def transform(self, arg, *args, **kwargs):
        """
        Call function producing a like-indexed Series on each group and return
        a Series with the transformed values.

        Parameters
        ----------
        func : function
            To apply to each group. Should return a Series with the same index

        Returns
        -------
        transformed : Series

        Examples
        --------
        >>> resampled.transform(lambda x: (x - x.mean()) / x.std())
        """
        return self._selected_obj.groupby(self.groupby).transform(
            arg, *args, **kwargs)

    def _downsample(self, f):
        raise AbstractMethodError(self)

    def _upsample(self, f, limit=None, fill_value=None):
        raise AbstractMethodError(self)

    def _gotitem(self, key, ndim, subset=None):
        """
        Sub-classes to define. Return a sliced object.

        Parameters
        ----------
        key : string / list of selections
        ndim : 1,2
            requested ndim of result
        subset : object, default None
            subset to act on
        """
        self._set_binner()
        grouper = self.grouper
        if subset is None:
            subset = self.obj
        grouped = groupby(subset, by=None, grouper=grouper, axis=self.axis)

        # try the key selection
        try:
            return grouped[key]
        except KeyError:
            return grouped

    def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
        """
        Re-evaluate the obj with a groupby aggregation.
        """

        if grouper is None:
            self._set_binner()
            grouper = self.grouper

        obj = self._selected_obj

        try:
            grouped = groupby(obj, by=None, grouper=grouper, axis=self.axis)
Loading ...