"""
Collection of utilities to manipulate structured arrays.
Most of these functions were initially implemented by John Hunter for
matplotlib. They have been rewritten and extended for convenience.
"""
from __future__ import division, absolute_import, print_function
import sys
import itertools
import numpy as np
import numpy.ma as ma
from numpy import ndarray, recarray
from numpy.ma import MaskedArray
from numpy.ma.mrecords import MaskedRecords
from numpy.core.overrides import array_function_dispatch
from numpy.lib._iotools import _is_string_like
from numpy.compat import basestring
from numpy.testing import suppress_warnings
if sys.version_info[0] < 3:
from future_builtins import zip
_check_fill_value = np.ma.core._check_fill_value
__all__ = [
'append_fields', 'drop_fields', 'find_duplicates',
'get_fieldstructure', 'join_by', 'merge_arrays',
'rec_append_fields', 'rec_drop_fields', 'rec_join',
'recursive_fill_fields', 'rename_fields', 'stack_arrays',
]
def _recursive_fill_fields_dispatcher(input, output):
return (input, output)
@array_function_dispatch(_recursive_fill_fields_dispatcher)
def recursive_fill_fields(input, output):
"""
Fills fields from output with fields from input,
with support for nested structures.
Parameters
----------
input : ndarray
Input array.
output : ndarray
Output array.
Notes
-----
* `output` should be at least the same size as `input`
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)])
>>> b = np.zeros((3,), dtype=a.dtype)
>>> rfn.recursive_fill_fields(a, b)
array([(1, 10.0), (2, 20.0), (0, 0.0)],
dtype=[('A', '<i4'), ('B', '<f8')])
"""
newdtype = output.dtype
for field in newdtype.names:
try:
current = input[field]
except ValueError:
continue
if current.dtype.names:
recursive_fill_fields(current, output[field])
else:
output[field][:len(current)] = current
return output
def get_fieldspec(dtype):
"""
Produce a list of name/dtype pairs corresponding to the dtype fields
Similar to dtype.descr, but the second item of each tuple is a dtype, not a
string. As a result, this handles subarray dtypes
Can be passed to the dtype constructor to reconstruct the dtype, noting that
this (deliberately) discards field offsets.
Examples
--------
>>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)])
>>> dt.descr
[(('a', 'A'), '<i4'), ('b', '<f8', (3,))]
>>> get_fieldspec(dt)
[(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))]
"""
if dtype.names is None:
# .descr returns a nameless field, so we should too
return [('', dtype)]
else:
fields = ((name, dtype.fields[name]) for name in dtype.names)
# keep any titles, if present
return [
(name if len(f) == 2 else (f[2], name), f[0])
for name, f in fields
]
def get_names(adtype):
"""
Returns the field names of the input datatype as a tuple.
Parameters
----------
adtype : dtype
Input datatype
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> rfn.get_names(np.empty((1,), dtype=int)) is None
True
>>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)]))
('A', 'B')
>>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
>>> rfn.get_names(adtype)
('a', ('b', ('ba', 'bb')))
"""
listnames = []
names = adtype.names
for name in names:
current = adtype[name]
if current.names:
listnames.append((name, tuple(get_names(current))))
else:
listnames.append(name)
return tuple(listnames) or None
def get_names_flat(adtype):
"""
Returns the field names of the input datatype as a tuple. Nested structure
are flattend beforehand.
Parameters
----------
adtype : dtype
Input datatype
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None
True
>>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)]))
('A', 'B')
>>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
>>> rfn.get_names_flat(adtype)
('a', 'b', 'ba', 'bb')
"""
listnames = []
names = adtype.names
for name in names:
listnames.append(name)
current = adtype[name]
if current.names:
listnames.extend(get_names_flat(current))
return tuple(listnames) or None
def flatten_descr(ndtype):
"""
Flatten a structured data-type description.
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> ndtype = np.dtype([('a', '<i4'), ('b', [('ba', '<f8'), ('bb', '<i4')])])
>>> rfn.flatten_descr(ndtype)
(('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32')))
"""
names = ndtype.names
if names is None:
return (('', ndtype),)
else:
descr = []
for field in names:
(typ, _) = ndtype.fields[field]
if typ.names:
descr.extend(flatten_descr(typ))
else:
descr.append((field, typ))
return tuple(descr)
def _zip_dtype_dispatcher(seqarrays, flatten=None):
return seqarrays
@array_function_dispatch(_zip_dtype_dispatcher)
def zip_dtype(seqarrays, flatten=False):
newdtype = []
if flatten:
for a in seqarrays:
newdtype.extend(flatten_descr(a.dtype))
else:
for a in seqarrays:
current = a.dtype
if current.names and len(current.names) <= 1:
# special case - dtypes of 0 or 1 field are flattened
newdtype.extend(get_fieldspec(current))
else:
newdtype.append(('', current))
return np.dtype(newdtype)
@array_function_dispatch(_zip_dtype_dispatcher)
def zip_descr(seqarrays, flatten=False):
"""
Combine the dtype description of a series of arrays.
Parameters
----------
seqarrays : sequence of arrays
Sequence of arrays
flatten : {boolean}, optional
Whether to collapse nested descriptions.
"""
return zip_dtype(seqarrays, flatten=flatten).descr
def get_fieldstructure(adtype, lastname=None, parents=None,):
"""
Returns a dictionary with fields indexing lists of their parent fields.
This function is used to simplify access to fields nested in other fields.
Parameters
----------
adtype : np.dtype
Input datatype
lastname : optional
Last processed field name (used internally during recursion).
parents : dictionary
Dictionary of parent fields (used interbally during recursion).
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> ndtype = np.dtype([('A', int),
... ('B', [('BA', int),
... ('BB', [('BBA', int), ('BBB', int)])])])
>>> rfn.get_fieldstructure(ndtype)
... # XXX: possible regression, order of BBA and BBB is swapped
{'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
"""
if parents is None:
parents = {}
names = adtype.names
for name in names:
current = adtype[name]
if current.names:
if lastname:
parents[name] = [lastname, ]
else:
parents[name] = []
parents.update(get_fieldstructure(current, name, parents))
else:
lastparent = [_ for _ in (parents.get(lastname, []) or [])]
if lastparent:
lastparent.append(lastname)
elif lastname:
lastparent = [lastname, ]
parents[name] = lastparent or []
return parents or None
def _izip_fields_flat(iterable):
"""
Returns an iterator of concatenated fields from a sequence of arrays,
collapsing any nested structure.
"""
for element in iterable:
if isinstance(element, np.void):
for f in _izip_fields_flat(tuple(element)):
yield f
else:
yield element
def _izip_fields(iterable):
"""
Returns an iterator of concatenated fields from a sequence of arrays.
"""
for element in iterable:
if (hasattr(element, '__iter__') and
not isinstance(element, basestring)):
for f in _izip_fields(element):
yield f
elif isinstance(element, np.void) and len(tuple(element)) == 1:
for f in _izip_fields(element):
yield f
else:
yield element
def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
return seqarrays
@array_function_dispatch(_izip_records_dispatcher)
def izip_records(seqarrays, fill_value=None, flatten=True):
"""
Returns an iterator of concatenated items from a sequence of arrays.
Parameters
----------
seqarrays : sequence of arrays
Sequence of arrays.
fill_value : {None, integer}
Value used to pad shorter iterables.
flatten : {True, False},
Whether to
"""
# Should we flatten the items, or just use a nested approach
if flatten:
zipfunc = _izip_fields_flat
else:
zipfunc = _izip_fields
if sys.version_info[0] >= 3:
zip_longest = itertools.zip_longest
else:
zip_longest = itertools.izip_longest
for tup in zip_longest(*seqarrays, fillvalue=fill_value):
yield tuple(zipfunc(tup))
Loading ...