"""
Record Arrays
=============
Record arrays expose the fields of structured arrays as properties.
Most commonly, ndarrays contain elements of a single type, e.g. floats,
integers, bools etc. However, it is possible for elements to be combinations
of these using structured types, such as::
>>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
>>> a
array([(1, 2.0), (1, 2.0)],
dtype=[('x', '<i4'), ('y', '<f8')])
Here, each element consists of two fields: x (and int), and y (a float).
This is known as a structured array. The different fields are analogous
to columns in a spread-sheet. The different fields can be accessed as
one would a dictionary::
>>> a['x']
array([1, 1])
>>> a['y']
array([ 2., 2.])
Record arrays allow us to access fields as properties::
>>> ar = np.rec.array(a)
>>> ar.x
array([1, 1])
>>> ar.y
array([ 2., 2.])
"""
from __future__ import division, absolute_import, print_function
import sys
import os
import warnings
from . import numeric as sb
from . import numerictypes as nt
from numpy.compat import isfileobj, bytes, long, unicode, os_fspath
from numpy.core.overrides import set_module
from .arrayprint import get_printoptions
# All of the functions allow formats to be a dtype
__all__ = ['record', 'recarray', 'format_parser']
ndarray = sb.ndarray
_byteorderconv = {'b':'>',
'l':'<',
'n':'=',
'B':'>',
'L':'<',
'N':'=',
'S':'s',
's':'s',
'>':'>',
'<':'<',
'=':'=',
'|':'|',
'I':'|',
'i':'|'}
# formats regular expression
# allows multidimension spec with a tuple syntax in front
# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
# are equally allowed
numfmt = nt.typeDict
def find_duplicate(list):
"""Find duplication in a list, return a list of duplicated elements"""
dup = []
for i in range(len(list)):
if (list[i] in list[i + 1:]):
if (list[i] not in dup):
dup.append(list[i])
return dup
@set_module('numpy')
class format_parser(object):
"""
Class to convert formats, names, titles description to a dtype.
After constructing the format_parser object, the dtype attribute is
the converted data-type:
``dtype = format_parser(formats, names, titles).dtype``
Attributes
----------
dtype : dtype
The converted data-type.
Parameters
----------
formats : str or list of str
The format description, either specified as a string with
comma-separated format descriptions in the form ``'f8, i4, a5'``, or
a list of format description strings in the form
``['f8', 'i4', 'a5']``.
names : str or list/tuple of str
The field names, either specified as a comma-separated string in the
form ``'col1, col2, col3'``, or as a list or tuple of strings in the
form ``['col1', 'col2', 'col3']``.
An empty list can be used, in that case default field names
('f0', 'f1', ...) are used.
titles : sequence
Sequence of title strings. An empty list can be used to leave titles
out.
aligned : bool, optional
If True, align the fields by padding as the C-compiler would.
Default is False.
byteorder : str, optional
If specified, all the fields will be changed to the
provided byte-order. Otherwise, the default byte-order is
used. For all available string specifiers, see `dtype.newbyteorder`.
See Also
--------
dtype, typename, sctype2char
Examples
--------
>>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
... ['T1', 'T2', 'T3']).dtype
dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'),
(('T3', 'col3'), '|S5')])
`names` and/or `titles` can be empty lists. If `titles` is an empty list,
titles will simply not appear. If `names` is empty, default field names
will be used.
>>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
... []).dtype
dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')])
>>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype
dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')])
"""
def __init__(self, formats, names, titles, aligned=False, byteorder=None):
self._parseFormats(formats, aligned)
self._setfieldnames(names, titles)
self._createdescr(byteorder)
self.dtype = self._descr
def _parseFormats(self, formats, aligned=0):
""" Parse the field formats """
if formats is None:
raise ValueError("Need formats argument")
if isinstance(formats, list):
if len(formats) < 2:
formats.append('')
formats = ','.join(formats)
dtype = sb.dtype(formats, aligned)
fields = dtype.fields
if fields is None:
dtype = sb.dtype([('f1', dtype)], aligned)
fields = dtype.fields
keys = dtype.names
self._f_formats = [fields[key][0] for key in keys]
self._offsets = [fields[key][1] for key in keys]
self._nfields = len(keys)
def _setfieldnames(self, names, titles):
"""convert input field names into a list and assign to the _names
attribute """
if (names):
if (type(names) in [list, tuple]):
pass
elif isinstance(names, (str, unicode)):
names = names.split(',')
else:
raise NameError("illegal input names %s" % repr(names))
self._names = [n.strip() for n in names[:self._nfields]]
else:
self._names = []
# if the names are not specified, they will be assigned as
# "f0, f1, f2,..."
# if not enough names are specified, they will be assigned as "f[n],
# f[n+1],..." etc. where n is the number of specified names..."
self._names += ['f%d' % i for i in range(len(self._names),
self._nfields)]
# check for redundant names
_dup = find_duplicate(self._names)
if _dup:
raise ValueError("Duplicate field names: %s" % _dup)
if (titles):
self._titles = [n.strip() for n in titles[:self._nfields]]
else:
self._titles = []
titles = []
if (self._nfields > len(titles)):
self._titles += [None] * (self._nfields - len(titles))
def _createdescr(self, byteorder):
descr = sb.dtype({'names':self._names,
'formats':self._f_formats,
'offsets':self._offsets,
'titles':self._titles})
if (byteorder is not None):
byteorder = _byteorderconv[byteorder[0]]
descr = descr.newbyteorder(byteorder)
self._descr = descr
class record(nt.void):
"""A data-type scalar that allows field access as attribute lookup.
"""
# manually set name and module so that this class's type shows up
# as numpy.record when printed
__name__ = 'record'
__module__ = 'numpy'
def __repr__(self):
if get_printoptions()['legacy'] == '1.13':
return self.__str__()
return super(record, self).__repr__()
def __str__(self):
if get_printoptions()['legacy'] == '1.13':
return str(self.item())
return super(record, self).__str__()
def __getattribute__(self, attr):
if attr in ['setfield', 'getfield', 'dtype']:
return nt.void.__getattribute__(self, attr)
try:
return nt.void.__getattribute__(self, attr)
except AttributeError:
pass
fielddict = nt.void.__getattribute__(self, 'dtype').fields
res = fielddict.get(attr, None)
if res:
obj = self.getfield(*res[:2])
# if it has fields return a record,
# otherwise return the object
try:
dt = obj.dtype
except AttributeError:
#happens if field is Object type
return obj
if dt.fields:
return obj.view((self.__class__, obj.dtype.fields))
return obj
else:
raise AttributeError("'record' object has no "
"attribute '%s'" % attr)
def __setattr__(self, attr, val):
if attr in ['setfield', 'getfield', 'dtype']:
raise AttributeError("Cannot set '%s' attribute" % attr)
fielddict = nt.void.__getattribute__(self, 'dtype').fields
res = fielddict.get(attr, None)
if res:
return self.setfield(val, *res[:2])
else:
if getattr(self, attr, None):
return nt.void.__setattr__(self, attr, val)
else:
raise AttributeError("'record' object has no "
"attribute '%s'" % attr)
def __getitem__(self, indx):
obj = nt.void.__getitem__(self, indx)
# copy behavior of record.__getattribute__,
if isinstance(obj, nt.void) and obj.dtype.fields:
return obj.view((self.__class__, obj.dtype.fields))
else:
# return a single element
return obj
def pprint(self):
"""Pretty-print all fields."""
# pretty-print all fields
names = self.dtype.names
maxlen = max(len(name) for name in names)
fmt = '%% %ds: %%s' % maxlen
rows = [fmt % (name, getattr(self, name)) for name in names]
return "\n".join(rows)
# The recarray is almost identical to a standard array (which supports
# named fields already) The biggest difference is that it can use
# attribute-lookup to find the fields and it is constructed using
# a record.
# If byteorder is given it forces a particular byteorder on all
# the fields (and any subfields)
class recarray(ndarray):
"""Construct an ndarray that allows field access using attributes.
Arrays may have a data-types containing fields, analogous
to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
where each entry in the array is a pair of ``(int, float)``. Normally,
these attributes are accessed using dictionary lookups such as ``arr['x']``
and ``arr['y']``. Record arrays allow the fields to be accessed as members
of the array, using ``arr.x`` and ``arr.y``.
Parameters
----------
shape : tuple
Shape of output array.
dtype : data-type, optional
The desired data-type. By default, the data-type is determined
from `formats`, `names`, `titles`, `aligned` and `byteorder`.
formats : list of data-types, optional
A list containing the data-types for the different columns, e.g.
``['i4', 'f8', 'i4']``. `formats` does *not* support the new
convention of using types directly, i.e. ``(int, float, int)``.
Note that `formats` must be a list, not a tuple.
Given that `formats` is somewhat limited, we recommend specifying
`dtype` instead.
names : tuple of str, optional
The name of each column, e.g. ``('x', 'y', 'z')``.
buf : buffer, optional
By default, a new array is created of the given shape and data-type.
If `buf` is specified and is an object exposing the buffer interface,
the array will use the memory from the existing buffer. In this case,
the `offset` and `strides` keywords are available.
Other Parameters
----------------
titles : tuple of str, optional
Aliases for column names. For example, if `names` were
``('x', 'y', 'z')`` and `titles` is
``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
byteorder : {'<', '>', '='}, optional
Byte-order for all fields.
Loading ...