"""A collection of functions designed to help I/O with ascii files.
"""
from __future__ import division, absolute_import, print_function
__docformat__ = "restructuredtext en"
import sys
import numpy as np
import numpy.core.numeric as nx
from numpy.compat import asbytes, asunicode, bytes, basestring
if sys.version_info[0] >= 3:
from builtins import bool, int, float, complex, object, str
unicode = str
else:
from __builtin__ import bool, int, float, complex, object, unicode, str
def _decode_line(line, encoding=None):
"""Decode bytes from binary input streams.
Defaults to decoding from 'latin1'. That differs from the behavior of
np.compat.asunicode that decodes from 'ascii'.
Parameters
----------
line : str or bytes
Line to be decoded.
Returns
-------
decoded_line : unicode
Unicode in Python 2, a str (unicode) in Python 3.
"""
if type(line) is bytes:
if encoding is None:
line = line.decode('latin1')
else:
line = line.decode(encoding)
return line
def _is_string_like(obj):
"""
Check whether obj behaves like a string.
"""
try:
obj + ''
except (TypeError, ValueError):
return False
return True
def _is_bytes_like(obj):
"""
Check whether obj behaves like a bytes object.
"""
try:
obj + b''
except (TypeError, ValueError):
return False
return True
def _to_filehandle(fname, flag='r', return_opened=False):
"""
Returns the filehandle corresponding to a string or a file.
If the string ends in '.gz', the file is automatically unzipped.
Parameters
----------
fname : string, filehandle
Name of the file whose filehandle must be returned.
flag : string, optional
Flag indicating the status of the file ('r' for read, 'w' for write).
return_opened : boolean, optional
Whether to return the opening status of the file.
"""
if _is_string_like(fname):
if fname.endswith('.gz'):
import gzip
fhd = gzip.open(fname, flag)
elif fname.endswith('.bz2'):
import bz2
fhd = bz2.BZ2File(fname)
else:
fhd = file(fname, flag)
opened = True
elif hasattr(fname, 'seek'):
fhd = fname
opened = False
else:
raise ValueError('fname must be a string or file handle')
if return_opened:
return fhd, opened
return fhd
def has_nested_fields(ndtype):
"""
Returns whether one or several fields of a dtype are nested.
Parameters
----------
ndtype : dtype
Data-type of a structured array.
Raises
------
AttributeError
If `ndtype` does not have a `names` attribute.
Examples
--------
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
>>> np.lib._iotools.has_nested_fields(dt)
False
"""
for name in ndtype.names or ():
if ndtype[name].names:
return True
return False
def flatten_dtype(ndtype, flatten_base=False):
"""
Unpack a structured data-type by collapsing nested fields and/or fields
with a shape.
Note that the field names are lost.
Parameters
----------
ndtype : dtype
The datatype to collapse
flatten_base : bool, optional
If True, transform a field with a shape into several fields. Default is
False.
Examples
--------
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
... ('block', int, (2, 3))])
>>> np.lib._iotools.flatten_dtype(dt)
[dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
>>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
[dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
dtype('int32')]
"""
names = ndtype.names
if names is None:
if flatten_base:
return [ndtype.base] * int(np.prod(ndtype.shape))
return [ndtype.base]
else:
types = []
for field in names:
info = ndtype.fields[field]
flat_dt = flatten_dtype(info[0], flatten_base)
types.extend(flat_dt)
return types
class LineSplitter(object):
"""
Object to split a string at a given delimiter or at given places.
Parameters
----------
delimiter : str, int, or sequence of ints, optional
If a string, character used to delimit consecutive fields.
If an integer or a sequence of integers, width(s) of each field.
comments : str, optional
Character used to mark the beginning of a comment. Default is '#'.
autostrip : bool, optional
Whether to strip each individual field. Default is True.
"""
def autostrip(self, method):
"""
Wrapper to strip each member of the output of `method`.
Parameters
----------
method : function
Function that takes a single argument and returns a sequence of
strings.
Returns
-------
wrapped : function
The result of wrapping `method`. `wrapped` takes a single input
argument and returns a list of strings that are stripped of
white-space.
"""
return lambda input: [_.strip() for _ in method(input)]
#
def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None):
delimiter = _decode_line(delimiter)
comments = _decode_line(comments)
self.comments = comments
# Delimiter is a character
if (delimiter is None) or isinstance(delimiter, basestring):
delimiter = delimiter or None
_handyman = self._delimited_splitter
# Delimiter is a list of field widths
elif hasattr(delimiter, '__iter__'):
_handyman = self._variablewidth_splitter
idx = np.cumsum([0] + list(delimiter))
delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
# Delimiter is a single integer
elif int(delimiter):
(_handyman, delimiter) = (
self._fixedwidth_splitter, int(delimiter))
else:
(_handyman, delimiter) = (self._delimited_splitter, None)
self.delimiter = delimiter
if autostrip:
self._handyman = self.autostrip(_handyman)
else:
self._handyman = _handyman
self.encoding = encoding
#
def _delimited_splitter(self, line):
"""Chop off comments, strip, and split at delimiter. """
if self.comments is not None:
line = line.split(self.comments)[0]
line = line.strip(" \r\n")
if not line:
return []
return line.split(self.delimiter)
#
def _fixedwidth_splitter(self, line):
if self.comments is not None:
line = line.split(self.comments)[0]
line = line.strip("\r\n")
if not line:
return []
fixed = self.delimiter
slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
return [line[s] for s in slices]
#
def _variablewidth_splitter(self, line):
if self.comments is not None:
line = line.split(self.comments)[0]
if not line:
return []
slices = self.delimiter
return [line[s] for s in slices]
#
def __call__(self, line):
return self._handyman(_decode_line(line, self.encoding))
class NameValidator(object):
"""
Object to validate a list of strings to use as field names.
The strings are stripped of any non alphanumeric character, and spaces
are replaced by '_'. During instantiation, the user can define a list
of names to exclude, as well as a list of invalid characters. Names in
the exclusion list are appended a '_' character.
Once an instance has been created, it can be called with a list of
names, and a list of valid names will be created. The `__call__`
method accepts an optional keyword "default" that sets the default name
in case of ambiguity. By default this is 'f', so that names will
default to `f0`, `f1`, etc.
Parameters
----------
excludelist : sequence, optional
A list of names to exclude. This list is appended to the default
list ['return', 'file', 'print']. Excluded names are appended an
underscore: for example, `file` becomes `file_` if supplied.
deletechars : str, optional
A string combining invalid characters that must be deleted from the
names.
case_sensitive : {True, False, 'upper', 'lower'}, optional
* If True, field names are case-sensitive.
* If False or 'upper', field names are converted to upper case.
* If 'lower', field names are converted to lower case.
The default value is True.
replace_space : '_', optional
Character(s) used in replacement of white spaces.
Notes
-----
Calling an instance of `NameValidator` is the same as calling its
method `validate`.
Examples
--------
>>> validator = np.lib._iotools.NameValidator()
>>> validator(['file', 'field2', 'with space', 'CaSe'])
['file_', 'field2', 'with_space', 'CaSe']
>>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
deletechars='q',
case_sensitive='False')
>>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
['excl_', 'field2', 'no_', 'with_space', 'case']
"""
#
defaultexcludelist = ['return', 'file', 'print']
defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
#
def __init__(self, excludelist=None, deletechars=None,
case_sensitive=None, replace_space='_'):
# Process the exclusion list ..
if excludelist is None:
excludelist = []
excludelist.extend(self.defaultexcludelist)
self.excludelist = excludelist
# Process the list of characters to delete
if deletechars is None:
delete = self.defaultdeletechars
else:
delete = set(deletechars)
delete.add('"')
self.deletechars = delete
# Process the case option .....
if (case_sensitive is None) or (case_sensitive is True):
self.case_converter = lambda x: x
elif (case_sensitive is False) or case_sensitive.startswith('u'):
self.case_converter = lambda x: x.upper()
elif case_sensitive.startswith('l'):
Loading ...