"""Base class for sparse matrices"""
from __future__ import division, print_function, absolute_import
import numpy as np
from scipy._lib.six import xrange
from scipy._lib._numpy_compat import broadcast_to
from .sputils import (isdense, isscalarlike, isintlike,
get_sum_dtype, validateaxis, check_reshape_kwargs,
check_shape, asmatrix)
__all__ = ['spmatrix', 'isspmatrix', 'issparse',
'SparseWarning', 'SparseEfficiencyWarning']
class SparseWarning(Warning):
pass
class SparseFormatWarning(SparseWarning):
pass
class SparseEfficiencyWarning(SparseWarning):
pass
# The formats that we might potentially understand.
_formats = {'csc': [0, "Compressed Sparse Column"],
'csr': [1, "Compressed Sparse Row"],
'dok': [2, "Dictionary Of Keys"],
'lil': [3, "LInked List"],
'dod': [4, "Dictionary of Dictionaries"],
'sss': [5, "Symmetric Sparse Skyline"],
'coo': [6, "COOrdinate"],
'lba': [7, "Linpack BAnded"],
'egd': [8, "Ellpack-itpack Generalized Diagonal"],
'dia': [9, "DIAgonal"],
'bsr': [10, "Block Sparse Row"],
'msr': [11, "Modified compressed Sparse Row"],
'bsc': [12, "Block Sparse Column"],
'msc': [13, "Modified compressed Sparse Column"],
'ssk': [14, "Symmetric SKyline"],
'nsk': [15, "Nonsymmetric SKyline"],
'jad': [16, "JAgged Diagonal"],
'uss': [17, "Unsymmetric Sparse Skyline"],
'vbr': [18, "Variable Block Row"],
'und': [19, "Undefined"]
}
# These univariate ufuncs preserve zeros.
_ufuncs_with_fixed_point_at_zero = frozenset([
np.sin, np.tan, np.arcsin, np.arctan, np.sinh, np.tanh, np.arcsinh,
np.arctanh, np.rint, np.sign, np.expm1, np.log1p, np.deg2rad,
np.rad2deg, np.floor, np.ceil, np.trunc, np.sqrt])
MAXPRINT = 50
class spmatrix(object):
""" This class provides a base class for all sparse matrices. It
cannot be instantiated. Most of the work is provided by subclasses.
"""
__array_priority__ = 10.1
ndim = 2
def __init__(self, maxprint=MAXPRINT):
self._shape = None
if self.__class__.__name__ == 'spmatrix':
raise ValueError("This class is not intended"
" to be instantiated directly.")
self.maxprint = maxprint
def set_shape(self, shape):
"""See `reshape`."""
# Make sure copy is False since this is in place
# Make sure format is unchanged because we are doing a __dict__ swap
new_matrix = self.reshape(shape, copy=False).asformat(self.format)
self.__dict__ = new_matrix.__dict__
def get_shape(self):
"""Get shape of a matrix."""
return self._shape
shape = property(fget=get_shape, fset=set_shape)
def reshape(self, *args, **kwargs):
"""reshape(self, shape, order='C', copy=False)
Gives a new shape to a sparse matrix without changing its data.
Parameters
----------
shape : length-2 tuple of ints
The new shape should be compatible with the original shape.
order : {'C', 'F'}, optional
Read the elements using this index order. 'C' means to read and
write the elements using C-like index order; e.g. read entire first
row, then second row, etc. 'F' means to read and write the elements
using Fortran-like index order; e.g. read entire first column, then
second column, etc.
copy : bool, optional
Indicates whether or not attributes of self should be copied
whenever possible. The degree to which attributes are copied varies
depending on the type of sparse matrix being used.
Returns
-------
reshaped_matrix : sparse matrix
A sparse matrix with the given `shape`, not necessarily of the same
format as the current object.
See Also
--------
numpy.matrix.reshape : NumPy's implementation of 'reshape' for
matrices
"""
# If the shape already matches, don't bother doing an actual reshape
# Otherwise, the default is to convert to COO and use its reshape
shape = check_shape(args, self.shape)
order, copy = check_reshape_kwargs(kwargs)
if shape == self.shape:
if copy:
return self.copy()
else:
return self
return self.tocoo(copy=copy).reshape(shape, order=order, copy=False)
def resize(self, shape):
"""Resize the matrix in-place to dimensions given by ``shape``
Any elements that lie within the new shape will remain at the same
indices, while non-zero elements lying outside the new shape are
removed.
Parameters
----------
shape : (int, int)
number of rows and columns in the new matrix
Notes
-----
The semantics are not identical to `numpy.ndarray.resize` or
`numpy.resize`. Here, the same data will be maintained at each index
before and after reshape, if that index is within the new bounds. In
numpy, resizing maintains contiguity of the array, moving elements
around in the logical matrix but not within a flattened representation.
We give no guarantees about whether the underlying data attributes
(arrays, etc.) will be modified in place or replaced with new objects.
"""
# As an inplace operation, this requires implementation in each format.
raise NotImplementedError(
'{}.resize is not implemented'.format(type(self).__name__))
def astype(self, dtype, casting='unsafe', copy=True):
"""Cast the matrix elements to a specified type.
Parameters
----------
dtype : string or numpy dtype
Typecode or data-type to which to cast the data.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur.
Defaults to 'unsafe' for backwards compatibility.
'no' means the data types should not be cast at all.
'equiv' means only byte-order changes are allowed.
'safe' means only casts which can preserve values are allowed.
'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
'unsafe' means any data conversions may be done.
copy : bool, optional
If `copy` is `False`, the result might share some memory with this
matrix. If `copy` is `True`, it is guaranteed that the result and
this matrix do not share any memory.
"""
dtype = np.dtype(dtype)
if self.dtype != dtype:
return self.tocsr().astype(
dtype, casting=casting, copy=copy).asformat(self.format)
elif copy:
return self.copy()
else:
return self
def asfptype(self):
"""Upcast matrix to a floating point format (if necessary)"""
fp_types = ['f', 'd', 'F', 'D']
if self.dtype.char in fp_types:
return self
else:
for fp_type in fp_types:
if self.dtype <= np.dtype(fp_type):
return self.astype(fp_type)
raise TypeError('cannot upcast [%s] to a floating '
'point format' % self.dtype.name)
def __iter__(self):
for r in xrange(self.shape[0]):
yield self[r, :]
def getmaxprint(self):
"""Maximum number of elements to display when printed."""
return self.maxprint
def count_nonzero(self):
"""Number of non-zero entries, equivalent to
np.count_nonzero(a.toarray())
Unlike getnnz() and the nnz property, which return the number of stored
entries (the length of the data attribute), this method counts the
actual number of non-zero entries in data.
"""
raise NotImplementedError("count_nonzero not implemented for %s." %
self.__class__.__name__)
def getnnz(self, axis=None):
"""Number of stored values, including explicit zeros.
Parameters
----------
axis : None, 0, or 1
Select between the number of values across the whole matrix, in
each column, or in each row.
See also
--------
count_nonzero : Number of non-zero entries
"""
raise NotImplementedError("getnnz not implemented for %s." %
self.__class__.__name__)
@property
def nnz(self):
"""Number of stored values, including explicit zeros.
See also
--------
count_nonzero : Number of non-zero entries
"""
return self.getnnz()
def getformat(self):
"""Format of a matrix representation as a string."""
return getattr(self, 'format', 'und')
def __repr__(self):
_, format_name = _formats[self.getformat()]
return "<%dx%d sparse matrix of type '%s'\n" \
"\twith %d stored elements in %s format>" % \
(self.shape + (self.dtype.type, self.nnz, format_name))
def __str__(self):
maxprint = self.getmaxprint()
A = self.tocoo()
# helper function, outputs "(i,j) v"
def tostr(row, col, data):
triples = zip(list(zip(row, col)), data)
return '\n'.join([(' %s\t%s' % t) for t in triples])
if self.nnz > maxprint:
half = maxprint // 2
out = tostr(A.row[:half], A.col[:half], A.data[:half])
out += "\n :\t:\n"
half = maxprint - maxprint//2
out += tostr(A.row[-half:], A.col[-half:], A.data[-half:])
else:
out = tostr(A.row, A.col, A.data)
return out
def __bool__(self): # Simple -- other ideas?
if self.shape == (1, 1):
return self.nnz != 0
else:
raise ValueError("The truth value of an array with more than one "
"element is ambiguous. Use a.any() or a.all().")
__nonzero__ = __bool__
# What should len(sparse) return? For consistency with dense matrices,
# perhaps it should be the number of rows? But for some uses the number of
# non-zeros is more important. For now, raise an exception!
def __len__(self):
raise TypeError("sparse matrix length is ambiguous; use getnnz()"
" or shape[0]")
def asformat(self, format, copy=False):
"""Return this matrix in the passed format.
Parameters
----------
format : {str, None}
The desired matrix format ("csr", "csc", "lil", "dok", "array", ...)
or None for no conversion.
copy : bool, optional
If True, the result is guaranteed to not share data with self.
Returns
-------
A : This matrix in the passed format.
"""
if format is None or format == self.format:
if copy:
return self.copy()
else:
return self
else:
try:
convert_method = getattr(self, 'to' + format)
except AttributeError:
raise ValueError('Format {} is unknown.'.format(format))
# Forward the copy kwarg, if it's accepted.
try:
return convert_method(copy=copy)
except TypeError:
return convert_method()
###################################################################
# NOTE: All arithmetic operations use csr_matrix by default.
# Therefore a new sparse matrix format just needs to define a
# .tocsr() method to provide arithmetic support. Any of these
# methods can be overridden for efficiency.
####################################################################
def multiply(self, other):
"""Point-wise multiplication by another matrix
"""
return self.tocsr().multiply(other)
def maximum(self, other):
"""Element-wise maximum between this and another matrix."""
return self.tocsr().maximum(other)
Loading ...