# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Avoid name clash with `pa.struct` function
import struct as _struct
cdef class Tensor(_Weakrefable):
"""
A n-dimensional array a.k.a Tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
<pyarrow.Tensor>
type: int32
shape: (2, 3)
strides: (12, 4)
"""
def __init__(self):
raise TypeError("Do not call Tensor's constructor directly, use one "
"of the `pyarrow.Tensor.from_*` functions instead.")
cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
self.sp_tensor = sp_tensor
self.tp = sp_tensor.get()
self.type = pyarrow_wrap_data_type(self.tp.type())
self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape)
self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides)
def _make_shape_or_strides_buffer(self, values):
"""
Make a bytes object holding an array of `values` cast to `Py_ssize_t`.
"""
return _struct.pack(f"{len(values)}n", *values)
def __repr__(self):
return """<pyarrow.Tensor>
type: {0.type}
shape: {0.shape}
strides: {0.strides}""".format(self)
@staticmethod
def from_numpy(obj, dim_names=None):
"""
Create a Tensor from a numpy array.
Parameters
----------
obj : numpy.ndarray
The source numpy array
dim_names : list, optional
Names of each dimension of the Tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
<pyarrow.Tensor>
type: int32
shape: (2, 3)
strides: (12, 4)
"""
cdef:
vector[c_string] c_dim_names
shared_ptr[CTensor] ctensor
if dim_names is not None:
for x in dim_names:
c_dim_names.push_back(tobytes(x))
check_status(NdarrayToTensor(c_default_memory_pool(), obj,
c_dim_names, &ctensor))
return pyarrow_wrap_tensor(ctensor)
def to_numpy(self):
"""
Convert arrow::Tensor to numpy.ndarray with zero copy
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.to_numpy()
array([[ 2, 2, 4],
[ 4, 5, 100]], dtype=int32)
"""
cdef PyObject* out
check_status(TensorToNdarray(self.sp_tensor, self, &out))
return PyObject_to_object(out)
def equals(self, Tensor other):
"""
Return true if the tensors contains exactly equal data.
Parameters
----------
other : Tensor
The other tensor to compare for equality.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
>>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a","b"])
>>> tensor.equals(tensor)
True
>>> tensor.equals(tensor2)
False
"""
return self.tp.Equals(deref(other.tp))
def __eq__(self, other):
if isinstance(other, Tensor):
return self.equals(other)
else:
return NotImplemented
def dim_name(self, i):
"""
Returns the name of the i-th tensor dimension.
Parameters
----------
i : int
The physical index of the tensor dimension.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.dim_name(0)
'dim1'
>>> tensor.dim_name(1)
'dim2'
"""
return frombytes(self.tp.dim_name(i))
@property
def dim_names(self):
"""
Names of this tensor dimensions.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.dim_names
['dim1', 'dim2']
"""
return [frombytes(x) for x in tuple(self.tp.dim_names())]
@property
def is_mutable(self):
"""
Is this tensor mutable or immutable.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.is_mutable
True
"""
return self.tp.is_mutable()
@property
def is_contiguous(self):
"""
Is this tensor contiguous in memory.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.is_contiguous
True
"""
return self.tp.is_contiguous()
@property
def ndim(self):
"""
The dimension (n) of this tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.ndim
2
"""
return self.tp.ndim()
@property
def size(self):
"""
The size of this tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.size
6
"""
return self.tp.size()
@property
def shape(self):
"""
The shape of this tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.shape
(2, 3)
"""
# Cython knows how to convert a vector[T] to a Python list
return tuple(self.tp.shape())
@property
def strides(self):
"""
Strides of this tensor.
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
>>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
>>> tensor.strides
(12, 4)
"""
return tuple(self.tp.strides())
def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
buffer.buf = <char *> self.tp.data().get().data()
pep3118_format = self.type.pep3118_format
if pep3118_format is None:
raise NotImplementedError("type %s not supported for buffer "
"protocol" % (self.type,))
buffer.format = pep3118_format
buffer.itemsize = self.type.bit_width // 8
buffer.internal = NULL
buffer.len = self.tp.size() * buffer.itemsize
buffer.ndim = self.tp.ndim()
buffer.obj = self
if self.tp.is_mutable():
buffer.readonly = 0
else:
buffer.readonly = 1
buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape)
buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides)
buffer.suboffsets = NULL
ctypedef CSparseCOOIndex* _CSparseCOOIndexPtr
cdef class SparseCOOTensor(_Weakrefable):
"""
A sparse COO tensor.
"""
def __init__(self):
raise TypeError("Do not call SparseCOOTensor's constructor directly, "
"use one of the `pyarrow.SparseCOOTensor.from_*` "
"functions instead.")
cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor):
self.sp_sparse_tensor = sp_sparse_tensor
self.stp = sp_sparse_tensor.get()
self.type = pyarrow_wrap_data_type(self.stp.type())
def __repr__(self):
return """<pyarrow.SparseCOOTensor>
type: {0.type}
shape: {0.shape}""".format(self)
@classmethod
def from_dense_numpy(cls, obj, dim_names=None):
"""
Convert numpy.ndarray to arrow::SparseCOOTensor
Parameters
----------
obj : numpy.ndarray
Data used to populate the rows.
dim_names : list[str], optional
Names of the dimensions.
Returns
-------
pyarrow.SparseCOOTensor
"""
return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
@staticmethod
def from_numpy(data, coords, shape, dim_names=None):
"""
Create arrow::SparseCOOTensor from numpy.ndarrays
Loading ...