Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / ops.py

"""
Arithmetic operations for PandasObjects

This is not a public API.
"""
# necessary to enforce truediv in Python 2.X
from __future__ import division

import datetime
import operator
import textwrap
import warnings

import numpy as np

from pandas._libs import algos as libalgos, lib, ops as libops
import pandas.compat as compat
from pandas.compat import bind_method
from pandas.errors import NullFrequencyError
from pandas.util._decorators import Appender

from pandas.core.dtypes.cast import (
    construct_1d_object_array_from_listlike, find_common_type,
    maybe_upcast_putmask)
from pandas.core.dtypes.common import (
    ensure_object, is_bool_dtype, is_categorical_dtype, is_datetime64_dtype,
    is_datetime64tz_dtype, is_datetimelike_v_numeric, is_extension_array_dtype,
    is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype,
    is_scalar, is_timedelta64_dtype, needs_i8_conversion)
from pandas.core.dtypes.generic import (
    ABCDataFrame, ABCIndex, ABCIndexClass, ABCPanel, ABCSeries, ABCSparseArray,
    ABCSparseSeries)
from pandas.core.dtypes.missing import isna, notna

import pandas as pd
import pandas.core.common as com
import pandas.core.missing as missing

# -----------------------------------------------------------------------------
# Ops Wrapping Utilities


def get_op_result_name(left, right):
    """
    Find the appropriate name to pin to an operation result.  This result
    should always be either an Index or a Series.

    Parameters
    ----------
    left : {Series, Index}
    right : object

    Returns
    -------
    name : object
        Usually a string
    """
    # `left` is always a pd.Series when called from within ops
    if isinstance(right, (ABCSeries, pd.Index)):
        name = _maybe_match_name(left, right)
    else:
        name = left.name
    return name


def _maybe_match_name(a, b):
    """
    Try to find a name to attach to the result of an operation between
    a and b.  If only one of these has a `name` attribute, return that
    name.  Otherwise return a consensus name if they match of None if
    they have different names.

    Parameters
    ----------
    a : object
    b : object

    Returns
    -------
    name : str or None

    See Also
    --------
    pandas.core.common.consensus_name_attr
    """
    a_has = hasattr(a, 'name')
    b_has = hasattr(b, 'name')
    if a_has and b_has:
        if a.name == b.name:
            return a.name
        else:
            # TODO: what if they both have np.nan for their names?
            return None
    elif a_has:
        return a.name
    elif b_has:
        return b.name
    return None


def maybe_upcast_for_op(obj):
    """
    Cast non-pandas objects to pandas types to unify behavior of arithmetic
    and comparison operations.

    Parameters
    ----------
    obj: object

    Returns
    -------
    out : object

    Notes
    -----
    Be careful to call this *after* determining the `name` attribute to be
    attached to the result of the arithmetic operation.
    """
    if type(obj) is datetime.timedelta:
        # GH#22390  cast up to Timedelta to rely on Timedelta
        # implementation; otherwise operation against numeric-dtype
        # raises TypeError
        return pd.Timedelta(obj)
    elif isinstance(obj, np.timedelta64) and not isna(obj):
        # In particular non-nanosecond timedelta64 needs to be cast to
        #  nanoseconds, or else we get undesired behavior like
        #  np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
        # The isna check is to avoid casting timedelta64("NaT"), which would
        #  return NaT and incorrectly be treated as a datetime-NaT.
        return pd.Timedelta(obj)
    elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
        # GH#22390 Unfortunately we need to special-case right-hand
        # timedelta64 dtypes because numpy casts integer dtypes to
        # timedelta64 when operating with timedelta64
        return pd.TimedeltaIndex(obj)
    return obj


# -----------------------------------------------------------------------------
# Reversed Operations not available in the stdlib operator module.
# Defining these instead of using lambdas allows us to reference them by name.

def radd(left, right):
    return right + left


def rsub(left, right):
    return right - left


def rmul(left, right):
    return right * left


def rdiv(left, right):
    return right / left


def rtruediv(left, right):
    return right / left


def rfloordiv(left, right):
    return right // left


def rmod(left, right):
    # check if right is a string as % is the string
    # formatting operation; this is a TypeError
    # otherwise perform the op
    if isinstance(right, compat.string_types):
        raise TypeError("{typ} cannot perform the operation mod".format(
            typ=type(left).__name__))

    return right % left


def rdivmod(left, right):
    return divmod(right, left)


def rpow(left, right):
    return right ** left


def rand_(left, right):
    return operator.and_(right, left)


def ror_(left, right):
    return operator.or_(right, left)


def rxor(left, right):
    return operator.xor(right, left)


# -----------------------------------------------------------------------------

def make_invalid_op(name):
    """
    Return a binary method that always raises a TypeError.

    Parameters
    ----------
    name : str

    Returns
    -------
    invalid_op : function
    """
    def invalid_op(self, other=None):
        raise TypeError("cannot perform {name} with this index type: "
                        "{typ}".format(name=name, typ=type(self).__name__))

    invalid_op.__name__ = name
    return invalid_op


def _gen_eval_kwargs(name):
    """
    Find the keyword arguments to pass to numexpr for the given operation.

    Parameters
    ----------
    name : str

    Returns
    -------
    eval_kwargs : dict

    Examples
    --------
    >>> _gen_eval_kwargs("__add__")
    {}

    >>> _gen_eval_kwargs("rtruediv")
    {'reversed': True, 'truediv': True}
    """
    kwargs = {}

    # Series and Panel appear to only pass __add__, __radd__, ...
    # but DataFrame gets both these dunder names _and_ non-dunder names
    # add, radd, ...
    name = name.replace('__', '')

    if name.startswith('r'):
        if name not in ['radd', 'rand', 'ror', 'rxor']:
            # Exclude commutative operations
            kwargs['reversed'] = True

    if name in ['truediv', 'rtruediv']:
        kwargs['truediv'] = True

    if name in ['ne']:
        kwargs['masker'] = True

    return kwargs


def _gen_fill_zeros(name):
    """
    Find the appropriate fill value to use when filling in undefined values
    in the results of the given operation caused by operating on
    (generally dividing by) zero.

    Parameters
    ----------
    name : str

    Returns
    -------
    fill_value : {None, np.nan, np.inf}
    """
    name = name.strip('__')
    if 'div' in name:
        # truediv, floordiv, div, and reversed variants
        fill_value = np.inf
    elif 'mod' in name:
        # mod, rmod
        fill_value = np.nan
    else:
        fill_value = None
    return fill_value


def _get_frame_op_default_axis(name):
    """
    Only DataFrame cares about default_axis, specifically:
    special methods have default_axis=None and flex methods
    have default_axis='columns'.

    Parameters
    ----------
    name : str

    Returns
    -------
    default_axis: str or None
    """
    if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']:
        # bool methods
        return 'columns'
    elif name.startswith('__'):
        # __add__, __mul__, ...
        return None
    else:
        # add, mul, ...
        return 'columns'


def _get_opstr(op, cls):
    """
    Find the operation string, if any, to pass to numexpr for this
    operation.

    Parameters
    ----------
    op : binary operator
    cls : class

    Returns
    -------
    op_str : string or None
    """
    # numexpr is available for non-sparse classes
    subtyp = getattr(cls, '_subtyp', '')
    use_numexpr = 'sparse' not in subtyp

    if not use_numexpr:
        # if we're not using numexpr, then don't pass a str_rep
        return None

    return {operator.add: '+',
            radd: '+',
            operator.mul: '*',
            rmul: '*',
            operator.sub: '-',
            rsub: '-',
            operator.truediv: '/',
            rtruediv: '/',
            operator.floordiv: '//',
            rfloordiv: '//',
            operator.mod: None,  # TODO: Why None for mod but '%' for rmod?
            rmod: '%',
Loading ...