"""
This module contains a set of functions for vectorized string
operations and methods.
.. note::
The `chararray` class exists for backwards compatibility with
Numarray, it is not recommended for new development. Starting from numpy
1.4, if one needs arrays of strings, it is recommended to use arrays of
`dtype` `object_`, `string_` or `unicode_`, and use the free functions
in the `numpy.char` module for fast vectorized string operations.
Some methods will only be available if the corresponding string method is
available in your version of Python.
The preferred alias for `defchararray` is `numpy.char`.
"""
from __future__ import division, absolute_import, print_function
import functools
import sys
from .numerictypes import string_, unicode_, integer, object_, bool_, character
from .numeric import ndarray, compare_chararrays
from .numeric import array as narray
from numpy.core.multiarray import _vec_string
from numpy.core.overrides import set_module
from numpy.core import overrides
from numpy.compat import asbytes, long
import numpy
__all__ = [
'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal',
'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
'array', 'asarray'
]
_globalvar = 0
if sys.version_info[0] >= 3:
_unicode = str
_bytes = bytes
else:
_unicode = unicode
_bytes = str
_len = len
array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy.char')
def _use_unicode(*args):
"""
Helper function for determining the output type of some string
operations.
For an operation on two ndarrays, if at least one is unicode, the
result should be unicode.
"""
for x in args:
if (isinstance(x, _unicode) or
issubclass(numpy.asarray(x).dtype.type, unicode_)):
return unicode_
return string_
def _to_string_or_unicode_array(result):
"""
Helper function to cast a result back into a string or unicode array
if an object array must be used as an intermediary.
"""
return numpy.asarray(result.tolist())
def _clean_args(*args):
"""
Helper function for delegating arguments to Python string
functions.
Many of the Python string operations that have optional arguments
do not use 'None' to indicate a default value. In these cases,
we need to remove all `None` arguments, and those following them.
"""
newargs = []
for chk in args:
if chk is None:
break
newargs.append(chk)
return newargs
def _get_num_chars(a):
"""
Helper function that returns the number of characters per field in
a string or unicode array. This is to abstract out the fact that
for a unicode array this is itemsize / 4.
"""
if issubclass(a.dtype.type, unicode_):
return a.itemsize // 4
return a.itemsize
def _binary_op_dispatcher(x1, x2):
return (x1, x2)
@array_function_dispatch(_binary_op_dispatcher)
def equal(x1, x2):
"""
Return (x1 == x2) element-wise.
Unlike `numpy.equal`, this comparison is performed by first
stripping whitespace characters from the end of the string. This
behavior is provided for backward-compatibility with numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
not_equal, greater_equal, less_equal, greater, less
"""
return compare_chararrays(x1, x2, '==', True)
@array_function_dispatch(_binary_op_dispatcher)
def not_equal(x1, x2):
"""
Return (x1 != x2) element-wise.
Unlike `numpy.not_equal`, this comparison is performed by first
stripping whitespace characters from the end of the string. This
behavior is provided for backward-compatibility with numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
equal, greater_equal, less_equal, greater, less
"""
return compare_chararrays(x1, x2, '!=', True)
@array_function_dispatch(_binary_op_dispatcher)
def greater_equal(x1, x2):
"""
Return (x1 >= x2) element-wise.
Unlike `numpy.greater_equal`, this comparison is performed by
first stripping whitespace characters from the end of the string.
This behavior is provided for backward-compatibility with
numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
equal, not_equal, less_equal, greater, less
"""
return compare_chararrays(x1, x2, '>=', True)
@array_function_dispatch(_binary_op_dispatcher)
def less_equal(x1, x2):
"""
Return (x1 <= x2) element-wise.
Unlike `numpy.less_equal`, this comparison is performed by first
stripping whitespace characters from the end of the string. This
behavior is provided for backward-compatibility with numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
equal, not_equal, greater_equal, greater, less
"""
return compare_chararrays(x1, x2, '<=', True)
@array_function_dispatch(_binary_op_dispatcher)
def greater(x1, x2):
"""
Return (x1 > x2) element-wise.
Unlike `numpy.greater`, this comparison is performed by first
stripping whitespace characters from the end of the string. This
behavior is provided for backward-compatibility with numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
equal, not_equal, greater_equal, less_equal, less
"""
return compare_chararrays(x1, x2, '>', True)
@array_function_dispatch(_binary_op_dispatcher)
def less(x1, x2):
"""
Return (x1 < x2) element-wise.
Unlike `numpy.greater`, this comparison is performed by first
stripping whitespace characters from the end of the string. This
behavior is provided for backward-compatibility with numarray.
Parameters
----------
x1, x2 : array_like of str or unicode
Input arrays of the same shape.
Returns
-------
out : ndarray or bool
Output array of bools, or a single bool if x1 and x2 are scalars.
See Also
--------
equal, not_equal, greater_equal, less_equal, greater
"""
return compare_chararrays(x1, x2, '<', True)
def _unary_op_dispatcher(a):
return (a,)
@array_function_dispatch(_unary_op_dispatcher)
def str_len(a):
"""
Return len(a) element-wise.
Parameters
----------
a : array_like of str or unicode
Returns
-------
out : ndarray
Output array of integers
See also
--------
__builtin__.len
"""
return _vec_string(a, integer, '__len__')
@array_function_dispatch(_binary_op_dispatcher)
def add(x1, x2):
"""
Return element-wise string concatenation for two arrays of str or unicode.
Arrays `x1` and `x2` must have the same shape.
Parameters
----------
x1 : array_like of str or unicode
Input array.
x2 : array_like of str or unicode
Input array.
Returns
-------
add : ndarray
Output array of `string_` or `unicode_`, depending on input types
of the same shape as `x1` and `x2`.
"""
arr1 = numpy.asarray(x1)
arr2 = numpy.asarray(x2)
out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
dtype = _use_unicode(arr1, arr2)
return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
def _multiply_dispatcher(a, i):
return (a,)
@array_function_dispatch(_multiply_dispatcher)
def multiply(a, i):
"""
Return (a * i), that is string multiple concatenation,
element-wise.
Values in `i` of less than 0 are treated as 0 (which yields an
empty string).
Parameters
----------
a : array_like of str or unicode
i : array_like of ints
Returns
-------
out : ndarray
Output array of str or unicode, depending on input types
"""
a_arr = numpy.asarray(a)
i_arr = numpy.asarray(i)
Loading ...