"""
Arithmetic operations for PandasObjects
This is not a public API.
"""
# necessary to enforce truediv in Python 2.X
from __future__ import division
import datetime
import operator
import textwrap
import warnings
import numpy as np
from pandas._libs import algos as libalgos, lib, ops as libops
import pandas.compat as compat
from pandas.compat import bind_method
from pandas.errors import NullFrequencyError
from pandas.util._decorators import Appender
from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike, find_common_type,
maybe_upcast_putmask)
from pandas.core.dtypes.common import (
ensure_object, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_datetimelike_v_numeric, is_extension_array_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype,
is_scalar, is_timedelta64_dtype, needs_i8_conversion)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndex, ABCIndexClass, ABCPanel, ABCSeries, ABCSparseArray,
ABCSparseSeries)
from pandas.core.dtypes.missing import isna, notna
import pandas as pd
import pandas.core.common as com
import pandas.core.missing as missing
# -----------------------------------------------------------------------------
# Ops Wrapping Utilities
def get_op_result_name(left, right):
"""
Find the appropriate name to pin to an operation result. This result
should always be either an Index or a Series.
Parameters
----------
left : {Series, Index}
right : object
Returns
-------
name : object
Usually a string
"""
# `left` is always a pd.Series when called from within ops
if isinstance(right, (ABCSeries, pd.Index)):
name = _maybe_match_name(left, right)
else:
name = left.name
return name
def _maybe_match_name(a, b):
"""
Try to find a name to attach to the result of an operation between
a and b. If only one of these has a `name` attribute, return that
name. Otherwise return a consensus name if they match of None if
they have different names.
Parameters
----------
a : object
b : object
Returns
-------
name : str or None
See Also
--------
pandas.core.common.consensus_name_attr
"""
a_has = hasattr(a, 'name')
b_has = hasattr(b, 'name')
if a_has and b_has:
if a.name == b.name:
return a.name
else:
# TODO: what if they both have np.nan for their names?
return None
elif a_has:
return a.name
elif b_has:
return b.name
return None
def maybe_upcast_for_op(obj):
"""
Cast non-pandas objects to pandas types to unify behavior of arithmetic
and comparison operations.
Parameters
----------
obj: object
Returns
-------
out : object
Notes
-----
Be careful to call this *after* determining the `name` attribute to be
attached to the result of the arithmetic operation.
"""
if type(obj) is datetime.timedelta:
# GH#22390 cast up to Timedelta to rely on Timedelta
# implementation; otherwise operation against numeric-dtype
# raises TypeError
return pd.Timedelta(obj)
elif isinstance(obj, np.timedelta64) and not isna(obj):
# In particular non-nanosecond timedelta64 needs to be cast to
# nanoseconds, or else we get undesired behavior like
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
# The isna check is to avoid casting timedelta64("NaT"), which would
# return NaT and incorrectly be treated as a datetime-NaT.
return pd.Timedelta(obj)
elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
# GH#22390 Unfortunately we need to special-case right-hand
# timedelta64 dtypes because numpy casts integer dtypes to
# timedelta64 when operating with timedelta64
return pd.TimedeltaIndex(obj)
return obj
# -----------------------------------------------------------------------------
# Reversed Operations not available in the stdlib operator module.
# Defining these instead of using lambdas allows us to reference them by name.
def radd(left, right):
return right + left
def rsub(left, right):
return right - left
def rmul(left, right):
return right * left
def rdiv(left, right):
return right / left
def rtruediv(left, right):
return right / left
def rfloordiv(left, right):
return right // left
def rmod(left, right):
# check if right is a string as % is the string
# formatting operation; this is a TypeError
# otherwise perform the op
if isinstance(right, compat.string_types):
raise TypeError("{typ} cannot perform the operation mod".format(
typ=type(left).__name__))
return right % left
def rdivmod(left, right):
return divmod(right, left)
def rpow(left, right):
return right ** left
def rand_(left, right):
return operator.and_(right, left)
def ror_(left, right):
return operator.or_(right, left)
def rxor(left, right):
return operator.xor(right, left)
# -----------------------------------------------------------------------------
def make_invalid_op(name):
"""
Return a binary method that always raises a TypeError.
Parameters
----------
name : str
Returns
-------
invalid_op : function
"""
def invalid_op(self, other=None):
raise TypeError("cannot perform {name} with this index type: "
"{typ}".format(name=name, typ=type(self).__name__))
invalid_op.__name__ = name
return invalid_op
def _gen_eval_kwargs(name):
"""
Find the keyword arguments to pass to numexpr for the given operation.
Parameters
----------
name : str
Returns
-------
eval_kwargs : dict
Examples
--------
>>> _gen_eval_kwargs("__add__")
{}
>>> _gen_eval_kwargs("rtruediv")
{'reversed': True, 'truediv': True}
"""
kwargs = {}
# Series and Panel appear to only pass __add__, __radd__, ...
# but DataFrame gets both these dunder names _and_ non-dunder names
# add, radd, ...
name = name.replace('__', '')
if name.startswith('r'):
if name not in ['radd', 'rand', 'ror', 'rxor']:
# Exclude commutative operations
kwargs['reversed'] = True
if name in ['truediv', 'rtruediv']:
kwargs['truediv'] = True
if name in ['ne']:
kwargs['masker'] = True
return kwargs
def _gen_fill_zeros(name):
"""
Find the appropriate fill value to use when filling in undefined values
in the results of the given operation caused by operating on
(generally dividing by) zero.
Parameters
----------
name : str
Returns
-------
fill_value : {None, np.nan, np.inf}
"""
name = name.strip('__')
if 'div' in name:
# truediv, floordiv, div, and reversed variants
fill_value = np.inf
elif 'mod' in name:
# mod, rmod
fill_value = np.nan
else:
fill_value = None
return fill_value
def _get_frame_op_default_axis(name):
"""
Only DataFrame cares about default_axis, specifically:
special methods have default_axis=None and flex methods
have default_axis='columns'.
Parameters
----------
name : str
Returns
-------
default_axis: str or None
"""
if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']:
# bool methods
return 'columns'
elif name.startswith('__'):
# __add__, __mul__, ...
return None
else:
# add, mul, ...
return 'columns'
def _get_opstr(op, cls):
"""
Find the operation string, if any, to pass to numexpr for this
operation.
Parameters
----------
op : binary operator
cls : class
Returns
-------
op_str : string or None
"""
# numexpr is available for non-sparse classes
subtyp = getattr(cls, '_subtyp', '')
use_numexpr = 'sparse' not in subtyp
if not use_numexpr:
# if we're not using numexpr, then don't pass a str_rep
return None
return {operator.add: '+',
radd: '+',
operator.mul: '*',
rmul: '*',
operator.sub: '-',
rsub: '-',
operator.truediv: '/',
rtruediv: '/',
operator.floordiv: '//',
rfloordiv: '//',
operator.mod: None, # TODO: Why None for mod but '%' for rmod?
rmod: '%',
Loading ...