import copy
from datetime import timedelta
from textwrap import dedent
import warnings
import numpy as np
from pandas._libs import lib
from pandas._libs.tslibs import NaT, Timestamp
from pandas._libs.tslibs.frequencies import is_subperiod, is_superperiod
from pandas._libs.tslibs.period import IncompatibleFrequency
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
import pandas as pd
import pandas.core.algorithms as algos
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.groupby.generic import PanelGroupBy, SeriesGroupBy
from pandas.core.groupby.groupby import (
GroupBy, _GroupBy, _pipe_template, groupby)
from pandas.core.groupby.grouper import Grouper
from pandas.core.groupby.ops import BinGrouper
from pandas.core.indexes.datetimes import DatetimeIndex, date_range
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import DateOffset, Day, Nano, Tick
_shared_docs_kwargs = dict()
class Resampler(_GroupBy):
"""
Class for resampling datetimelike data, a groupby-like operation.
See aggregate, transform, and apply functions on this object.
It's easiest to use obj.resample(...) to use Resampler.
Parameters
----------
obj : pandas object
groupby : a TimeGrouper object
axis : int, default 0
kind : str or None
'period', 'timestamp' to override default index treatement
Returns
-------
a Resampler of the appropriate type
Notes
-----
After resampling, see aggregate, apply, and transform functions.
"""
# to the groupby descriptor
_attributes = ['freq', 'axis', 'closed', 'label', 'convention',
'loffset', 'base', 'kind']
def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
self.groupby = groupby
self.keys = None
self.sort = True
self.axis = axis
self.kind = kind
self.squeeze = False
self.group_keys = True
self.as_index = True
self.exclusions = set()
self.binner = None
self.grouper = None
if self.groupby is not None:
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
def __unicode__(self):
"""
Provide a nice str repr of our rolling object.
"""
attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k))
for k in self._attributes if
getattr(self.groupby, k, None) is not None]
return "{klass} [{attrs}]".format(klass=self.__class__.__name__,
attrs=', '.join(attrs))
def __getattr__(self, attr):
if attr in self._internal_names_set:
return object.__getattribute__(self, attr)
if attr in self._attributes:
return getattr(self.groupby, attr)
if attr in self.obj:
return self[attr]
return object.__getattribute__(self, attr)
def __iter__(self):
"""
Resampler iterator.
Returns
-------
Generator yielding sequence of (name, subsetted object)
for each group
See Also
--------
GroupBy.__iter__
"""
self._set_binner()
return super(Resampler, self).__iter__()
@property
def obj(self):
return self.groupby.obj
@property
def ax(self):
return self.groupby.ax
@property
def _typ(self):
"""
Masquerade for compat as a Series or a DataFrame.
"""
if isinstance(self._selected_obj, pd.Series):
return 'series'
return 'dataframe'
@property
def _from_selection(self):
"""
Is the resampling from a DataFrame column or MultiIndex level.
"""
# upsampling and PeriodIndex resampling do not work
# with selection, this state used to catch and raise an error
return (self.groupby is not None and
(self.groupby.key is not None or
self.groupby.level is not None))
def _convert_obj(self, obj):
"""
Provide any conversions for the object in order to correctly handle.
Parameters
----------
obj : the object to be resampled
Returns
-------
obj : converted object
"""
obj = obj._consolidate()
return obj
def _get_binner_for_time(self):
raise AbstractMethodError(self)
def _set_binner(self):
"""
Setup our binners.
Cache these as we are an immutable object
"""
if self.binner is None:
self.binner, self.grouper = self._get_binner()
def _get_binner(self):
"""
Create the BinGrouper, assume that self.set_grouper(obj)
has already been called.
"""
binner, bins, binlabels = self._get_binner_for_time()
bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
return binner, bin_grouper
def _assure_grouper(self):
"""
Make sure that we are creating our binner & grouper.
"""
self._set_binner()
@Substitution(klass='Resampler',
versionadded='.. versionadded:: 0.23.0',
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4
To get the difference between each 2-day period's maximum and minimum
value in one pass, you can do
>>> df.resample('2D').pipe(lambda x: x.max() - x.min())
A
2012-08-02 1
2012-08-04 1
""")
@Appender(_pipe_template)
def pipe(self, func, *args, **kwargs):
return super(Resampler, self).pipe(func, *args, **kwargs)
_agg_see_also_doc = dedent("""
See Also
--------
pandas.DataFrame.groupby.aggregate
pandas.DataFrame.resample.transform
pandas.DataFrame.aggregate
""")
_agg_examples_doc = dedent("""
Examples
--------
>>> s = pd.Series([1,2,3,4,5],
index=pd.date_range('20130101', periods=5,freq='s'))
2013-01-01 00:00:00 1
2013-01-01 00:00:01 2
2013-01-01 00:00:02 3
2013-01-01 00:00:03 4
2013-01-01 00:00:04 5
Freq: S, dtype: int64
>>> r = s.resample('2s')
DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left,
label=left, convention=start, base=0]
>>> r.agg(np.sum)
2013-01-01 00:00:00 3
2013-01-01 00:00:02 7
2013-01-01 00:00:04 5
Freq: 2S, dtype: int64
>>> r.agg(['sum','mean','max'])
sum mean max
2013-01-01 00:00:00 3 1.5 2
2013-01-01 00:00:02 7 3.5 4
2013-01-01 00:00:04 5 5.0 5
>>> r.agg({'result' : lambda x: x.mean() / x.std(),
'total' : np.sum})
total result
2013-01-01 00:00:00 3 2.121320
2013-01-01 00:00:02 7 4.949747
2013-01-01 00:00:04 5 NaN
""")
@Substitution(see_also=_agg_see_also_doc,
examples=_agg_examples_doc,
versionadded='',
klass='DataFrame',
axis='')
@Appender(_shared_docs['aggregate'])
def aggregate(self, func, *args, **kwargs):
self._set_binner()
result, how = self._aggregate(func, *args, **kwargs)
if result is None:
how = func
grouper = None
result = self._groupby_and_aggregate(how,
grouper,
*args,
**kwargs)
result = self._apply_loffset(result)
return result
agg = aggregate
apply = aggregate
def transform(self, arg, *args, **kwargs):
"""
Call function producing a like-indexed Series on each group and return
a Series with the transformed values.
Parameters
----------
func : function
To apply to each group. Should return a Series with the same index
Returns
-------
transformed : Series
Examples
--------
>>> resampled.transform(lambda x: (x - x.mean()) / x.std())
"""
return self._selected_obj.groupby(self.groupby).transform(
arg, *args, **kwargs)
def _downsample(self, f):
raise AbstractMethodError(self)
def _upsample(self, f, limit=None, fill_value=None):
raise AbstractMethodError(self)
def _gotitem(self, key, ndim, subset=None):
"""
Sub-classes to define. Return a sliced object.
Parameters
----------
key : string / list of selections
ndim : 1,2
requested ndim of result
subset : object, default None
subset to act on
"""
self._set_binner()
grouper = self.grouper
if subset is None:
subset = self.obj
grouped = groupby(subset, by=None, grouper=grouper, axis=self.axis)
# try the key selection
try:
return grouped[key]
except KeyError:
return grouped
def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
"""
Re-evaluate the obj with a groupby aggregation.
"""
if grouper is None:
self._set_binner()
grouper = self.grouper
obj = self._selected_obj
try:
grouped = groupby(obj, by=None, grouper=grouper, axis=self.axis)
Loading ...