# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: language_level = 3
from libc.stdint cimport uintptr_t, uint8_t, int64_t
from cpython.pycapsule cimport PyCapsule_GetPointer
from cpython.unicode cimport PyUnicode_AsUTF8AndSize
from cpython cimport (
Py_buffer,
PyBuffer_Release,
PyBUF_ANY_CONTIGUOUS,
PyBUF_FORMAT,
PyBytes_FromStringAndSize,
PyObject_GetBuffer,
PyUnicode_FromStringAndSize,
)
from nanoarrow_c cimport (
ArrowArray,
ArrowArrayAppendBytes,
ArrowArrayAppendNull,
ArrowArrayAppendString,
ArrowArrayBuffer,
ArrowArrayFinishBuilding,
ArrowArrayInitFromSchema,
ArrowArrayInitFromType,
ArrowArrayMove,
ArrowArrayRelease,
ArrowArrayStartAppending,
ArrowArrayView,
ArrowArrayViewComputeNullCount,
ArrowArrayViewInitFromSchema,
ArrowArrayViewIsNull,
ArrowArrayViewGetBytesUnsafe,
ArrowArrayViewGetBufferDataType,
ArrowArrayViewGetBufferElementSizeBits,
ArrowArrayViewGetBufferType,
ArrowArrayViewGetBufferView,
ArrowArrayViewGetNumBuffers,
ArrowArrayViewGetStringUnsafe,
ArrowArrayViewSetArray,
ArrowArrayViewSetArrayMinimal,
ArrowBitCountSet,
ArrowBuffer,
ArrowBufferMove,
ArrowBufferType,
ArrowBufferView,
ArrowSchemaInitFromType,
ArrowStringView,
ArrowType,
ArrowTypeString,
ArrowValidationLevel,
NANOARROW_BUFFER_TYPE_DATA,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
NANOARROW_BUFFER_TYPE_TYPE_ID,
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_VALIDITY,
NANOARROW_VALIDATION_LEVEL_DEFAULT,
NANOARROW_VALIDATION_LEVEL_FULL,
NANOARROW_VALIDATION_LEVEL_MINIMAL,
NANOARROW_VALIDATION_LEVEL_NONE,
NANOARROW_OK,
)
from nanoarrow_device_c cimport (
ARROW_DEVICE_CPU,
ArrowDeviceType,
ArrowDeviceArray,
ArrowDeviceArrayInit,
)
from nanoarrow._device cimport Device, CSharedSyncEvent
from nanoarrow._buffer cimport CBuffer, CBufferView
from nanoarrow._schema cimport CSchema, CLayout
from nanoarrow._utils cimport (
alloc_c_array,
alloc_c_device_array,
alloc_c_array_view,
c_array_shallow_copy,
c_device_array_shallow_copy,
Error
)
from typing import Iterable, Tuple, Union
from nanoarrow import _repr_utils
from nanoarrow._device import DEVICE_CPU, DeviceType
cdef class CArrayView:
"""Low-level ArrowArrayView wrapper
This object is a literal wrapper around an ArrowArrayView. It provides field accessors
that return Python objects and handles the structure lifecycle (i.e., initialized
ArrowArrayView structures are always released).
See `nanoarrow.c_array_view()` for construction and usage examples.
"""
def __cinit__(self, object base, uintptr_t addr):
self._base = base
self._ptr = <ArrowArrayView*>addr
self._event = CSharedSyncEvent(DEVICE_CPU)
def _set_array(self, CArray array, Device device=DEVICE_CPU):
cdef Error error = Error()
cdef int code
if device is DEVICE_CPU:
code = ArrowArrayViewSetArray(self._ptr, array._ptr, &error.c_error)
else:
code = ArrowArrayViewSetArrayMinimal(self._ptr, array._ptr, &error.c_error)
error.raise_message_not_ok("ArrowArrayViewSetArray()", code)
self._array_base = array._base
self._event = CSharedSyncEvent(device, <uintptr_t>array._sync_event)
return self
@property
def storage_type_id(self):
return self._ptr.storage_type
@property
def storage_type(self):
cdef const char* type_str = ArrowTypeString(self._ptr.storage_type)
if type_str != NULL:
return type_str.decode('UTF-8')
@property
def layout(self):
return CLayout(self, <uintptr_t>&self._ptr.layout)
def __len__(self):
return self._ptr.length
@property
def length(self):
return len(self)
@property
def offset(self):
return self._ptr.offset
@property
def null_count(self):
if self._ptr.null_count != -1:
return self._ptr.null_count
cdef ArrowBufferType buffer_type = self._ptr.layout.buffer_type[0]
cdef const uint8_t* validity_bits = self._ptr.buffer_views[0].data.as_uint8
if buffer_type != NANOARROW_BUFFER_TYPE_VALIDITY:
self._ptr.null_count = 0
elif validity_bits == NULL:
self._ptr.null_count = 0
elif self._event.device is DEVICE_CPU:
self._ptr.null_count = ArrowArrayViewComputeNullCount(self._ptr)
return self._ptr.null_count
@property
def n_children(self):
return self._ptr.n_children
def child(self, int64_t i):
if i < 0 or i >= self._ptr.n_children:
raise IndexError(f"{i} out of range [0, {self._ptr.n_children})")
cdef CArrayView child = CArrayView(
self._base,
<uintptr_t>self._ptr.children[i]
)
child._event = self._event
return child
@property
def children(self):
for i in range(self.n_children):
yield self.child(i)
@property
def n_buffers(self):
return ArrowArrayViewGetNumBuffers(self._ptr)
def _buffer_info(self, int64_t i):
if i < 0 or i >= self.n_buffers:
raise IndexError(f"{i} out of range [0, {self.n_buffers}]")
cdef ArrowBufferView view = ArrowArrayViewGetBufferView(self._ptr, i)
return (
ArrowArrayViewGetBufferType(self._ptr, i),
ArrowArrayViewGetBufferDataType(self._ptr, i),
ArrowArrayViewGetBufferElementSizeBits(self._ptr, i),
<uintptr_t>view.data.data,
view.size_bytes
)
def buffer_type(self, int64_t i):
buffer_type = self._buffer_info(i)[0]
if buffer_type == NANOARROW_BUFFER_TYPE_VALIDITY:
return "validity"
elif buffer_type == NANOARROW_BUFFER_TYPE_TYPE_ID:
return "type_id"
elif buffer_type == NANOARROW_BUFFER_TYPE_UNION_OFFSET:
return "union_offset"
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA_OFFSET:
return "data_offset"
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA:
return "data"
elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
return "variadic_data"
elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
return "variadic_size"
else:
return "none"
def buffer(self, int64_t i):
_, data_type, element_size_bits, addr, size = self._buffer_info(i)
cdef ArrowBufferView buffer_view
buffer_view.data.data = <void*>addr
buffer_view.size_bytes = size
# Check the buffer size here because the error later is cryptic.
# Buffer sizes are set to -1 when they are "unknown", so because of errors
# in nanoarrow/C or because the array is on a non-CPU device, that -1 value
# could leak its way here.
if buffer_view.size_bytes < 0:
raise RuntimeError(f"ArrowArrayView buffer {i} has size_bytes < 0")
return CBufferView(
self._array_base,
addr,
size,
data_type,
element_size_bits,
self._event
)
@property
def buffers(self):
for i in range(self.n_buffers):
yield self.buffer(i)
@property
def dictionary(self):
if self._ptr.dictionary == NULL:
return None
cdef CArrayView dictionary = CArrayView(
self,
<uintptr_t>self._ptr.dictionary
)
dictionary._event = self._event
return dictionary
def _iter_bytes(self, int64_t offset, int64_t length) -> bytes | None:
cdef ArrowBufferView item_view
for i in range(offset, length):
if ArrowArrayViewIsNull(self._ptr, i):
yield None
else:
item_view = ArrowArrayViewGetBytesUnsafe(self._ptr, i)
yield PyBytes_FromStringAndSize(item_view.data.as_char, item_view.size_bytes)
def _iter_str(self, int64_t offset, int64_t length) -> str | None:
cdef ArrowStringView item_view
for i in range(offset, length):
if ArrowArrayViewIsNull(self._ptr, i):
yield None
else:
item_view = ArrowArrayViewGetStringUnsafe(self._ptr, i)
yield PyUnicode_FromStringAndSize(item_view.data, item_view.size_bytes)
def __repr__(self):
return _repr_utils.array_view_repr(self)
@staticmethod
def from_schema(CSchema schema):
cdef ArrowArrayView* c_array_view
base = alloc_c_array_view(&c_array_view)
cdef Error error = Error()
cdef int code = ArrowArrayViewInitFromSchema(c_array_view,
schema._ptr, &error.c_error)
error.raise_message_not_ok("ArrowArrayViewInitFromSchema()", code)
return CArrayView(base, <uintptr_t>c_array_view)
@staticmethod
def from_array(CArray array, Device device=DEVICE_CPU):
out = CArrayView.from_schema(array._schema)
return out._set_array(array, device)
cdef class CArray:
"""Low-level ArrowArray wrapper
This object is a literal wrapper around a read-only ArrowArray. It provides field accessors
that return Python objects and handles the C Data interface lifecycle (i.e., initialized
ArrowArray structures are always released).
See `nanoarrow.c_array()` for construction and usage examples.
"""
@staticmethod
def allocate(CSchema schema) -> CArray:
"""Allocate a released ArrowArray"""
cdef ArrowArray* c_array_out
base = alloc_c_array(&c_array_out)
return CArray(base, <uintptr_t>c_array_out, schema)
def __cinit__(self, object base, uintptr_t addr, CSchema schema):
self._base = base
self._ptr = <ArrowArray*>addr
self._schema = schema
self._device_type = ARROW_DEVICE_CPU
self._device_id = -1
self._sync_event = NULL
cdef _set_device(self, ArrowDeviceType device_type, int64_t device_id, void* sync_event):
self._device_type = device_type
Loading ...