from collections.abc import Mapping
import numpy as np
import pandas as pd
def _right_squeeze(arr, stop_dim=0):
"""
Remove trailing singleton dimensions
Parameters
----------
arr : ndarray
Input array
stop_dim : int
Dimension where checking should stop so that shape[i] is not checked
for i < stop_dim
Returns
-------
squeezed : ndarray
Array with all trailing singleton dimensions (0 or 1) removed.
Singleton dimensions for dimension < stop_dim are retained.
"""
last = arr.ndim
for s in reversed(arr.shape):
if s > 1:
break
last -= 1
last = max(last, stop_dim)
return arr.reshape(arr.shape[:last])
def array_like(obj, name, dtype=np.double, ndim=1, maxdim=None,
shape=None, order='C', contiguous=False, optional=False):
"""
Convert array-like to a ndarray and check conditions
Parameters
----------
obj : array_like
An array, any object exposing the array interface, an object whose
__array__ method returns an array, or any (nested) sequence.
name : str
Name of the variable to use in exceptions
dtype : {None, numpy.dtype, str}
Required dtype. Default is double. If None, does not change the dtype
of obj (if present) or uses NumPy to automatically detect the dtype
ndim : {int, None}
Required number of dimensions of obj. If None, no check is performed.
If the numebr of dimensions of obj is less than ndim, additional axes
are inserted on the right. See examples.
maxdim : {int, None}
Maximum allowed dimension. Use ``maxdim`` instead of ``ndim`` when
inputs are allowed to have ndim 1, 2, ..., or maxdim.
shape : {tuple[int], None}
Required shape obj. If None, no check is performed. Partially
restricted shapes can be checked using None. See examples.
order : {'C', 'F'}
Order of the array
contiguous : bool
Ensure that the array's data is contiguous with order ``order``
optional : bool
Flag indicating whether None is allowed
Returns
-------
ndarray
The converted input.
Examples
--------
Convert a list or pandas series to an array
>>> import pandas as pd
>>> x = [0, 1, 2, 3]
>>> a = array_like(x, 'x', ndim=1)
>>> a.shape
(4,)
>>> a = array_like(pd.Series(x), 'x', ndim=1)
>>> a.shape
(4,)
>>> type(a.orig)
pandas.core.series.Series
Squeezes singleton dimensions when required
>>> x = np.array(x).reshape((4, 1))
>>> a = array_like(x, 'x', ndim=1)
>>> a.shape
(4,)
Right-appends when required size is larger than actual
>>> x = [0, 1, 2, 3]
>>> a = array_like(x, 'x', ndim=2)
>>> a.shape
(4, 1)
Check only the first and last dimension of the input
>>> x = np.arange(4*10*4).reshape((4, 10, 4))
>>> y = array_like(x, 'x', ndim=3, shape=(4, None, 4))
Check only the first two dimensions
>>> z = array_like(x, 'x', ndim=3, shape=(4, 10))
Raises ValueError if constraints are not satisfied
>>> z = array_like(x, 'x', ndim=2)
Traceback (most recent call last):
...
ValueError: x is required to have ndim 2 but has ndim 3
>>> z = array_like(x, 'x', shape=(10, 4, 4))
Traceback (most recent call last):
...
ValueError: x is required to have shape (10, 4, 4) but has shape (4, 10, 4)
>>> z = array_like(x, 'x', shape=(None, 4, 4))
Traceback (most recent call last):
...
ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4)
"""
if optional and obj is None:
return None
arr = np.asarray(obj, dtype=dtype, order=order)
if maxdim is not None:
if arr.ndim > maxdim:
msg = '{0} must have ndim <= {1}'.format(name, maxdim)
raise ValueError(msg)
elif ndim is not None:
if arr.ndim > ndim:
arr = _right_squeeze(arr, stop_dim=ndim)
elif arr.ndim < ndim:
arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim))
if arr.ndim != ndim:
msg = '{0} is required to have ndim {1} but has ndim {2}'
raise ValueError(msg.format(name, ndim, arr.ndim))
if shape is not None:
for actual, req in zip(arr.shape, shape):
if req is not None and actual != req:
req_shape = str(shape).replace('None, ', '*, ')
msg = '{0} is required to have shape {1} but has shape {2}'
raise ValueError(msg.format(name, req_shape, arr.shape))
if contiguous:
arr = np.ascontiguousarray(arr, dtype=dtype)
return arr
class PandasWrapper(object):
"""
Wrap array_like using the index from the original input, if pandas
Parameters
----------
pandas_obj : {Series, DataFrame}
Object to extract the index from for wrapping
Notes
-----
Raises if ``orig`` is a pandas type but obj and and ``orig`` have
different numbers of elements in axis 0. Also raises if the ndim of obj
is larger than 2.
"""
def __init__(self, pandas_obj):
self._pandas_obj = pandas_obj
self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame))
def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0):
"""
Parameters
----------
obj : {array_like}
The value to wrap like to a pandas Series or DataFrame.
columns : {str, list[str]}
Column names or series name, if obj is 1d.
append : str
String to append to the columns to create a new column name.
trim_start : int
The number of observations to drop from the start of the index, so
that the index applied is index[trim_start:].
trim_end : int
The number of observations to drop from the end of the index , so
that the index applied is index[:nobs - trim_end].
Returns
-------
array_like
A pandas Series or DataFrame, depending on the shape of obj.
"""
obj = np.asarray(obj)
if not self._is_pandas:
return obj
if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]:
raise ValueError('obj must have the same number of elements in '
'axis 0 as orig')
index = self._pandas_obj.index
index = index[trim_start:index.shape[0] - trim_end]
if obj.ndim == 1:
if columns is None:
name = getattr(self._pandas_obj, 'name', None)
elif isinstance(columns, str):
name = columns
else:
name = columns[0]
if append is not None:
name = append if name is None else name + '_' + append
return pd.Series(obj, name=name, index=index)
elif obj.ndim == 2:
if columns is None:
columns = getattr(self._pandas_obj, 'columns', None)
if append is not None:
new = []
for c in columns:
new.append(append if c is None else str(c) + '_' + append)
columns = new
return pd.DataFrame(obj, columns=columns, index=index)
else:
raise ValueError('Can only wrap 1 or 2-d array_like')
def bool_like(value, name, optional=False, strict=False):
"""
Convert to bool or raise if not bool_like
Parameters
----------
value : object
Value to verify
name : str
Variable name for exceptions
optional : bool
Flag indicating whether None is allowed
strict : bool
If True, then only allow bool. If False, allow types that support
casting to bool.
Returns
-------
converted : bool
value converted to a bool
"""
if optional and value is None:
return value
extra_text = ' or None' if optional else ''
if strict:
if isinstance(value, bool):
return value
else:
raise TypeError('{0} must be a bool{1}'.format(name, extra_text))
if hasattr(value, 'squeeze') and callable(value.squeeze):
value = value.squeeze()
try:
return bool(value)
except Exception:
raise TypeError('{0} must be a bool (or bool-compatible)'
'{1}'.format(name, extra_text))
def int_like(value, name, optional=False, strict=False):
"""
Convert to int or raise if not int_like
Parameters
----------
value : object
Value to verify
name : str
Variable name for exceptions
optional : bool
Flag indicating whether None is allowed
strict : bool
If True, then only allow int or np.integer that are not bool. If False,
allow types that support integer division by 1 and conversion to int.
Returns
-------
converted : int
value converted to a int
"""
if optional and value is None:
return None
is_bool_timedelta = isinstance(value, (bool, np.timedelta64))
if hasattr(value, 'squeeze') and callable(value.squeeze):
value = value.squeeze()
if isinstance(value, (int, np.integer)) and not is_bool_timedelta:
return int(value)
elif not strict and not is_bool_timedelta:
try:
if value == (value // 1):
return int(value)
except Exception:
pass
extra_text = ' or None' if optional else ''
raise TypeError('{0} must be integer_like (int or np.integer, but not bool'
' or timedelta64){1}'.format(name, extra_text))
def float_like(value, name, optional=False, strict=False):
"""
Convert to float or raise if not float_like
Parameters
----------
value : object
Value to verify
name : str
Variable name for exceptions
optional : bool
Flag indicating whether None is allowed
strict : bool
If True, then only allow int, np.integer, float or np.inexact that are
not bool or complex. If False, allow complex types with 0 imag part or
any other type that is float like in the sense that it support
multiplication by 1.0 and conversion to float.
Returns
-------
converted : float
value converted to a float
"""
if optional and value is None:
return None
is_bool = isinstance(value, bool)
is_complex = isinstance(value, (complex, np.complexfloating))
if hasattr(value, 'squeeze') and callable(value.squeeze):
value = value.squeeze()
if (isinstance(value, (int, np.integer, float, np.inexact)) and
not (is_bool or is_complex)):
return float(value)
elif not strict and is_complex:
imag = np.imag(value)
if imag == 0:
return float(np.real(value))
elif not strict and not is_bool:
try:
return float(value / 1.0)
except Exception:
pass
extra_text = ' or None' if optional else ''
Loading ...