Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ tests / test_sparse_tensor.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
import sys
import weakref

import numpy as np
import pyarrow as pa

try:
    from scipy.sparse import csr_matrix, coo_matrix
except ImportError:
    coo_matrix = None
    csr_matrix = None

try:
    import sparse
except ImportError:
    sparse = None


tensor_type_pairs = [
    ('i1', pa.int8()),
    ('i2', pa.int16()),
    ('i4', pa.int32()),
    ('i8', pa.int64()),
    ('u1', pa.uint8()),
    ('u2', pa.uint16()),
    ('u4', pa.uint32()),
    ('u8', pa.uint64()),
    ('f2', pa.float16()),
    ('f4', pa.float32()),
    ('f8', pa.float64())
]


@pytest.mark.parametrize('sparse_tensor_type', [
    pa.SparseCSRMatrix,
    pa.SparseCSCMatrix,
    pa.SparseCOOTensor,
    pa.SparseCSFTensor,
])
def test_sparse_tensor_attrs(sparse_tensor_type):
    data = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ])
    dim_names = ('x', 'y')
    sparse_tensor = sparse_tensor_type.from_dense_numpy(data, dim_names)

    assert sparse_tensor.ndim == 2
    assert sparse_tensor.size == 24
    assert sparse_tensor.shape == data.shape
    assert sparse_tensor.is_mutable
    assert sparse_tensor.dim_name(0) == dim_names[0]
    assert sparse_tensor.dim_names == dim_names
    assert sparse_tensor.non_zero_length == 6

    wr = weakref.ref(sparse_tensor)
    assert wr() is not None
    del sparse_tensor
    assert wr() is None


def test_sparse_coo_tensor_base_object():
    expected_data = np.array([[8, 2, 5, 3, 4, 6]]).T
    expected_coords = np.array([
        [0, 0, 1, 2, 3, 3],
        [0, 2, 5, 0, 4, 5],
    ]).T
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ])
    sparse_tensor = pa.SparseCOOTensor.from_dense_numpy(array)
    n = sys.getrefcount(sparse_tensor)
    result_data, result_coords = sparse_tensor.to_numpy()
    assert sparse_tensor.has_canonical_format
    assert sys.getrefcount(sparse_tensor) == n + 2

    sparse_tensor = None
    assert np.array_equal(expected_data, result_data)
    assert np.array_equal(expected_coords, result_coords)
    assert result_coords.flags.c_contiguous  # row-major


def test_sparse_csr_matrix_base_object():
    data = np.array([[8, 2, 5, 3, 4, 6]]).T
    indptr = np.array([0, 2, 3, 4, 6])
    indices = np.array([0, 2, 5, 0, 4, 5])
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ])
    sparse_tensor = pa.SparseCSRMatrix.from_dense_numpy(array)
    n = sys.getrefcount(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sys.getrefcount(sparse_tensor) == n + 3

    sparse_tensor = None
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr, result_indptr)
    assert np.array_equal(indices, result_indices)


def test_sparse_csf_tensor_base_object():
    data = np.array([[8, 2, 5, 3, 4, 6]]).T
    indptr = [np.array([0, 2, 3, 4, 6])]
    indices = [
        np.array([0, 1, 2, 3]),
        np.array([0, 2, 5, 0, 4, 5])
    ]
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ])
    sparse_tensor = pa.SparseCSFTensor.from_dense_numpy(array)
    n = sys.getrefcount(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sys.getrefcount(sparse_tensor) == n + 4

    sparse_tensor = None
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr[0], result_indptr[0])
    assert np.array_equal(indices[0], result_indices[0])
    assert np.array_equal(indices[1], result_indices[1])


@pytest.mark.parametrize('sparse_tensor_type', [
    pa.SparseCSRMatrix,
    pa.SparseCSCMatrix,
    pa.SparseCOOTensor,
    pa.SparseCSFTensor,
])
def test_sparse_tensor_equals(sparse_tensor_type):
    def eq(a, b):
        assert a.equals(b)
        assert a == b
        assert not (a != b)

    def ne(a, b):
        assert not a.equals(b)
        assert not (a == b)
        assert a != b

    data = np.random.randn(10, 6)[::, ::2]
    sparse_tensor1 = sparse_tensor_type.from_dense_numpy(data)
    sparse_tensor2 = sparse_tensor_type.from_dense_numpy(
        np.ascontiguousarray(data))
    eq(sparse_tensor1, sparse_tensor2)
    data = data.copy()
    data[9, 0] = 1.0
    sparse_tensor2 = sparse_tensor_type.from_dense_numpy(
        np.ascontiguousarray(data))
    ne(sparse_tensor1, sparse_tensor2)


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_coo_tensor_from_dense(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    expected_data = np.array([[8, 2, 5, 3, 4, 6]]).T.astype(dtype)
    expected_coords = np.array([
        [0, 0, 1, 2, 3, 3],
        [0, 2, 5, 0, 4, 5],
    ]).T
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ]).astype(dtype)
    tensor = pa.Tensor.from_numpy(array)

    # Test from numpy array
    sparse_tensor = pa.SparseCOOTensor.from_dense_numpy(array)
    repr(sparse_tensor)
    result_data, result_coords = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(expected_data, result_data)
    assert np.array_equal(expected_coords, result_coords)

    # Test from Tensor
    sparse_tensor = pa.SparseCOOTensor.from_tensor(tensor)
    repr(sparse_tensor)
    result_data, result_coords = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(expected_data, result_data)
    assert np.array_equal(expected_coords, result_coords)


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_csr_matrix_from_dense(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    data = np.array([[8, 2, 5, 3, 4, 6]]).T.astype(dtype)
    indptr = np.array([0, 2, 3, 4, 6])
    indices = np.array([0, 2, 5, 0, 4, 5])
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ]).astype(dtype)
    tensor = pa.Tensor.from_numpy(array)

    # Test from numpy array
    sparse_tensor = pa.SparseCSRMatrix.from_dense_numpy(array)
    repr(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr, result_indptr)
    assert np.array_equal(indices, result_indices)

    # Test from Tensor
    sparse_tensor = pa.SparseCSRMatrix.from_tensor(tensor)
    repr(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr, result_indptr)
    assert np.array_equal(indices, result_indices)


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_csf_tensor_from_dense_numpy(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    data = np.array([[8, 2, 5, 3, 4, 6]]).T.astype(dtype)
    indptr = [np.array([0, 2, 3, 4, 6])]
    indices = [
        np.array([0, 1, 2, 3]),
        np.array([0, 2, 5, 0, 4, 5])
    ]
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ]).astype(dtype)

    # Test from numpy array
    sparse_tensor = pa.SparseCSFTensor.from_dense_numpy(array)
    repr(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr[0], result_indptr[0])
    assert np.array_equal(indices[0], result_indices[0])
    assert np.array_equal(indices[1], result_indices[1])


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_csf_tensor_from_dense_tensor(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    data = np.array([[8, 2, 5, 3, 4, 6]]).T.astype(dtype)
    indptr = [np.array([0, 2, 3, 4, 6])]
    indices = [
        np.array([0, 1, 2, 3]),
        np.array([0, 2, 5, 0, 4, 5])
    ]
    array = np.array([
        [8, 0, 2, 0, 0, 0],
        [0, 0, 0, 0, 0, 5],
        [3, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 6],
    ]).astype(dtype)
    tensor = pa.Tensor.from_numpy(array)

    # Test from Tensor
    sparse_tensor = pa.SparseCSFTensor.from_tensor(tensor)
    repr(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr[0], result_indptr[0])
    assert np.array_equal(indices[0], result_indices[0])
    assert np.array_equal(indices[1], result_indices[1])


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_coo_tensor_numpy_roundtrip(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    data = np.array([[1, 2, 3, 4, 5, 6]]).T.astype(dtype)
    coords = np.array([
        [0, 0, 2, 3, 1, 3],
        [0, 2, 0, 4, 5, 5],
    ]).T
    shape = (4, 6)
    dim_names = ('x', 'y')

    sparse_tensor = pa.SparseCOOTensor.from_numpy(data, coords, shape,
                                                  dim_names)
    repr(sparse_tensor)
    result_data, result_coords = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(coords, result_coords)
    assert sparse_tensor.dim_names == dim_names


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_csr_matrix_numpy_roundtrip(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
    data = np.array([[8, 2, 5, 3, 4, 6]]).T.astype(dtype)
    indptr = np.array([0, 2, 3, 4, 6])
    indices = np.array([0, 2, 5, 0, 4, 5])
    shape = (4, 6)
    dim_names = ('x', 'y')

    sparse_tensor = pa.SparseCSRMatrix.from_numpy(data, indptr, indices,
                                                  shape, dim_names)
    repr(sparse_tensor)
    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
    assert sparse_tensor.type == arrow_type
    assert np.array_equal(data, result_data)
    assert np.array_equal(indptr, result_indptr)
    assert np.array_equal(indices, result_indices)
    assert sparse_tensor.dim_names == dim_names


@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_sparse_csf_tensor_numpy_roundtrip(dtype_str, arrow_type):
    dtype = np.dtype(dtype_str)
Loading ...