Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / numpy   python

Repository URL to install this package:

Version: 1.19.1 

/ core / defchararray.py

"""
This module contains a set of functions for vectorized string
operations and methods.

.. note::
   The `chararray` class exists for backwards compatibility with
   Numarray, it is not recommended for new development. Starting from numpy
   1.4, if one needs arrays of strings, it is recommended to use arrays of
   `dtype` `object_`, `string_` or `unicode_`, and use the free functions
   in the `numpy.char` module for fast vectorized string operations.

Some methods will only be available if the corresponding string method is
available in your version of Python.

The preferred alias for `defchararray` is `numpy.char`.

"""
import functools
import sys
from .numerictypes import (
    string_, unicode_, integer, int_, object_, bool_, character)
from .numeric import ndarray, compare_chararrays
from .numeric import array as narray
from numpy.core.multiarray import _vec_string
from numpy.core.overrides import set_module
from numpy.core import overrides
from numpy.compat import asbytes
import numpy

__all__ = [
    'equal', 'not_equal', 'greater_equal', 'less_equal',
    'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
    'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
    'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
    'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
    'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
    'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
    'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
    'array', 'asarray'
    ]


_globalvar = 0

array_function_dispatch = functools.partial(
    overrides.array_function_dispatch, module='numpy.char')


def _use_unicode(*args):
    """
    Helper function for determining the output type of some string
    operations.

    For an operation on two ndarrays, if at least one is unicode, the
    result should be unicode.
    """
    for x in args:
        if (isinstance(x, str) or
                issubclass(numpy.asarray(x).dtype.type, unicode_)):
            return unicode_
    return string_

def _to_string_or_unicode_array(result):
    """
    Helper function to cast a result back into a string or unicode array
    if an object array must be used as an intermediary.
    """
    return numpy.asarray(result.tolist())

def _clean_args(*args):
    """
    Helper function for delegating arguments to Python string
    functions.

    Many of the Python string operations that have optional arguments
    do not use 'None' to indicate a default value.  In these cases,
    we need to remove all None arguments, and those following them.
    """
    newargs = []
    for chk in args:
        if chk is None:
            break
        newargs.append(chk)
    return newargs

def _get_num_chars(a):
    """
    Helper function that returns the number of characters per field in
    a string or unicode array.  This is to abstract out the fact that
    for a unicode array this is itemsize / 4.
    """
    if issubclass(a.dtype.type, unicode_):
        return a.itemsize // 4
    return a.itemsize


def _binary_op_dispatcher(x1, x2):
    return (x1, x2)


@array_function_dispatch(_binary_op_dispatcher)
def equal(x1, x2):
    """
    Return (x1 == x2) element-wise.

    Unlike `numpy.equal`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    not_equal, greater_equal, less_equal, greater, less
    """
    return compare_chararrays(x1, x2, '==', True)


@array_function_dispatch(_binary_op_dispatcher)
def not_equal(x1, x2):
    """
    Return (x1 != x2) element-wise.

    Unlike `numpy.not_equal`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    equal, greater_equal, less_equal, greater, less
    """
    return compare_chararrays(x1, x2, '!=', True)


@array_function_dispatch(_binary_op_dispatcher)
def greater_equal(x1, x2):
    """
    Return (x1 >= x2) element-wise.

    Unlike `numpy.greater_equal`, this comparison is performed by
    first stripping whitespace characters from the end of the string.
    This behavior is provided for backward-compatibility with
    numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    equal, not_equal, less_equal, greater, less
    """
    return compare_chararrays(x1, x2, '>=', True)


@array_function_dispatch(_binary_op_dispatcher)
def less_equal(x1, x2):
    """
    Return (x1 <= x2) element-wise.

    Unlike `numpy.less_equal`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    equal, not_equal, greater_equal, greater, less
    """
    return compare_chararrays(x1, x2, '<=', True)


@array_function_dispatch(_binary_op_dispatcher)
def greater(x1, x2):
    """
    Return (x1 > x2) element-wise.

    Unlike `numpy.greater`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    equal, not_equal, greater_equal, less_equal, less
    """
    return compare_chararrays(x1, x2, '>', True)


@array_function_dispatch(_binary_op_dispatcher)
def less(x1, x2):
    """
    Return (x1 < x2) element-wise.

    Unlike `numpy.greater`, this comparison is performed by first
    stripping whitespace characters from the end of the string.  This
    behavior is provided for backward-compatibility with numarray.

    Parameters
    ----------
    x1, x2 : array_like of str or unicode
        Input arrays of the same shape.

    Returns
    -------
    out : ndarray or bool
        Output array of bools, or a single bool if x1 and x2 are scalars.

    See Also
    --------
    equal, not_equal, greater_equal, less_equal, greater
    """
    return compare_chararrays(x1, x2, '<', True)


def _unary_op_dispatcher(a):
    return (a,)


@array_function_dispatch(_unary_op_dispatcher)
def str_len(a):
    """
    Return len(a) element-wise.

    Parameters
    ----------
    a : array_like of str or unicode

    Returns
    -------
    out : ndarray
        Output array of integers

    See also
    --------
    builtins.len
    """
    # Note: __len__, etc. currently return ints, which are not C-integers.
    # Generally intp would be expected for lengths, although int is sufficient
    # due to the dtype itemsize limitation.
    return _vec_string(a, int_, '__len__')


@array_function_dispatch(_binary_op_dispatcher)
def add(x1, x2):
    """
    Return element-wise string concatenation for two arrays of str or unicode.

    Arrays `x1` and `x2` must have the same shape.

    Parameters
    ----------
    x1 : array_like of str or unicode
        Input array.
    x2 : array_like of str or unicode
        Input array.

    Returns
    -------
    add : ndarray
        Output array of `string_` or `unicode_`, depending on input types
        of the same shape as `x1` and `x2`.

    """
    arr1 = numpy.asarray(x1)
    arr2 = numpy.asarray(x2)
    out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
    dtype = _use_unicode(arr1, arr2)
    return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))


def _multiply_dispatcher(a, i):
    return (a,)


@array_function_dispatch(_multiply_dispatcher)
def multiply(a, i):
    """
    Return (a * i), that is string multiple concatenation,
    element-wise.

    Values in `i` of less than 0 are treated as 0 (which yields an
    empty string).

    Parameters
    ----------
    a : array_like of str or unicode

    i : array_like of ints

    Returns
    -------
    out : ndarray
        Output array of str or unicode, depending on input types

    """
    a_arr = numpy.asarray(a)
    i_arr = numpy.asarray(i)
    if not issubclass(i_arr.dtype.type, integer):
        raise ValueError("Can only multiply by integers")
    out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
    return _vec_string(
        a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
Loading ...