Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ tensor.pxi

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Avoid name clash with `pa.struct` function
import struct as _struct


cdef class Tensor(_Weakrefable):
    """
    A n-dimensional array a.k.a Tensor.

    Examples
    --------
    >>> import pyarrow as pa
    >>> import numpy as np
    >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
    >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
    <pyarrow.Tensor>
    type: int32
    shape: (2, 3)
    strides: (12, 4)
    """

    def __init__(self):
        raise TypeError("Do not call Tensor's constructor directly, use one "
                        "of the `pyarrow.Tensor.from_*` functions instead.")

    cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
        self.sp_tensor = sp_tensor
        self.tp = sp_tensor.get()
        self.type = pyarrow_wrap_data_type(self.tp.type())
        self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape)
        self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides)

    def _make_shape_or_strides_buffer(self, values):
        """
        Make a bytes object holding an array of `values` cast to `Py_ssize_t`.
        """
        return _struct.pack(f"{len(values)}n", *values)

    def __repr__(self):
        return """<pyarrow.Tensor>
type: {0.type}
shape: {0.shape}
strides: {0.strides}""".format(self)

    @staticmethod
    def from_numpy(obj, dim_names=None):
        """
        Create a Tensor from a numpy array.

        Parameters
        ----------
        obj : numpy.ndarray
            The source numpy array
        dim_names : list, optional
            Names of each dimension of the Tensor.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        <pyarrow.Tensor>
        type: int32
        shape: (2, 3)
        strides: (12, 4)
        """
        cdef:
            vector[c_string] c_dim_names
            shared_ptr[CTensor] ctensor

        if dim_names is not None:
            for x in dim_names:
                c_dim_names.push_back(tobytes(x))

        check_status(NdarrayToTensor(c_default_memory_pool(), obj,
                                     c_dim_names, &ctensor))
        return pyarrow_wrap_tensor(ctensor)

    def to_numpy(self):
        """
        Convert arrow::Tensor to numpy.ndarray with zero copy

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.to_numpy()
        array([[  2,   2,   4],
               [  4,   5, 100]], dtype=int32)
        """
        cdef PyObject* out

        check_status(TensorToNdarray(self.sp_tensor, self, &out))
        return PyObject_to_object(out)

    def equals(self, Tensor other):
        """
        Return true if the tensors contains exactly equal data.

        Parameters
        ----------
        other : Tensor
            The other tensor to compare for equality.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a","b"])
        >>> tensor.equals(tensor)
        True
        >>> tensor.equals(tensor2)
        False
        """
        return self.tp.Equals(deref(other.tp))

    def __eq__(self, other):
        if isinstance(other, Tensor):
            return self.equals(other)
        else:
            return NotImplemented

    def dim_name(self, i):
        """
        Returns the name of the i-th tensor dimension.

        Parameters
        ----------
        i : int
            The physical index of the tensor dimension.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.dim_name(0)
        'dim1'
        >>> tensor.dim_name(1)
        'dim2'
        """
        return frombytes(self.tp.dim_name(i))

    @property
    def dim_names(self):
        """
        Names of this tensor dimensions.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.dim_names
        ['dim1', 'dim2']
        """
        return [frombytes(x) for x in tuple(self.tp.dim_names())]

    @property
    def is_mutable(self):
        """
        Is this tensor mutable or immutable.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.is_mutable
        True
        """
        return self.tp.is_mutable()

    @property
    def is_contiguous(self):
        """
        Is this tensor contiguous in memory.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.is_contiguous
        True
        """
        return self.tp.is_contiguous()

    @property
    def ndim(self):
        """
        The dimension (n) of this tensor.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.ndim
        2
        """
        return self.tp.ndim()

    @property
    def size(self):
        """
        The size of this tensor.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.size
        6
        """
        return self.tp.size()

    @property
    def shape(self):
        """
        The shape of this tensor.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.shape
        (2, 3)
        """
        # Cython knows how to convert a vector[T] to a Python list
        return tuple(self.tp.shape())

    @property
    def strides(self):
        """
        Strides of this tensor.

        Examples
        --------
        >>> import pyarrow as pa
        >>> import numpy as np
        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
        >>> tensor.strides
        (12, 4)
        """
        return tuple(self.tp.strides())

    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
        buffer.buf = <char *> self.tp.data().get().data()
        pep3118_format = self.type.pep3118_format
        if pep3118_format is None:
            raise NotImplementedError("type %s not supported for buffer "
                                      "protocol" % (self.type,))
        buffer.format = pep3118_format
        buffer.itemsize = self.type.bit_width // 8
        buffer.internal = NULL
        buffer.len = self.tp.size() * buffer.itemsize
        buffer.ndim = self.tp.ndim()
        buffer.obj = self
        if self.tp.is_mutable():
            buffer.readonly = 0
        else:
            buffer.readonly = 1
        buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape)
        buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides)
        buffer.suboffsets = NULL


ctypedef CSparseCOOIndex* _CSparseCOOIndexPtr


cdef class SparseCOOTensor(_Weakrefable):
    """
    A sparse COO tensor.
    """

    def __init__(self):
        raise TypeError("Do not call SparseCOOTensor's constructor directly, "
                        "use one of the `pyarrow.SparseCOOTensor.from_*` "
                        "functions instead.")

    cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor):
        self.sp_sparse_tensor = sp_sparse_tensor
        self.stp = sp_sparse_tensor.get()
        self.type = pyarrow_wrap_data_type(self.stp.type())

    def __repr__(self):
        return """<pyarrow.SparseCOOTensor>
type: {0.type}
shape: {0.shape}""".format(self)

    @classmethod
    def from_dense_numpy(cls, obj, dim_names=None):
        """
        Convert numpy.ndarray to arrow::SparseCOOTensor

        Parameters
        ----------
        obj : numpy.ndarray
            Data used to populate the rows.
        dim_names : list[str], optional
            Names of the dimensions.

        Returns
        -------
        pyarrow.SparseCOOTensor
        """
        return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))

    @staticmethod
    def from_numpy(data, coords, shape, dim_names=None):
        """
        Create arrow::SparseCOOTensor from numpy.ndarrays
Loading ...