# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: language_level = 3
from libc.stdint cimport uint8_t, int64_t
from libc.string cimport memcpy
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer, PyCapsule_IsValid
from cpython cimport (
Py_buffer,
PyObject_CheckBuffer,
PyBuffer_Release,
PyObject_GetBuffer,
PyBUF_FORMAT,
PyBUF_ANY_CONTIGUOUS
)
from cpython.ref cimport Py_INCREF, Py_DECREF
from nanoarrow_c cimport (
ArrowArray,
ArrowArrayAllocateChildren,
ArrowArrayAllocateDictionary,
ArrowArrayBuffer,
ArrowArrayInitFromType,
ArrowArrayMove,
ArrowArrayRelease,
ArrowArrayStream,
ArrowArrayStreamRelease,
ArrowArrayView,
ArrowArrayViewInitFromType,
ArrowArrayViewReset,
ArrowBuffer,
ArrowBufferAllocator,
ArrowBufferDeallocator,
ArrowBufferDeallocatorCallback,
ArrowBufferInit,
ArrowBufferReset,
ArrowFree,
ArrowMalloc,
ArrowNanoarrowVersion,
ArrowSchema,
ArrowSchemaRelease,
NANOARROW_OK,
NANOARROW_TYPE_UNINITIALIZED
)
from nanoarrow_device_c cimport (
ArrowDeviceArray
)
def c_version() -> str:
"""Return the nanoarrow C library version string
"""
return ArrowNanoarrowVersion().decode("UTF-8")
# CPython utilities that are helpful in Python and not available in all
# implementations of ctypes (e.g., early Python versions, pypy)
def obj_is_capsule(obj, str name) -> bool:
"""Check if an object is a PyCapsule
Provided because this function is not reliably available in all
version of PyPy's ctypes implementation.
Parameters
----------
obj : any
An object to check
name : str
The PyCapule "name" (e.g., "arrow_array")
"""
return PyCapsule_IsValid(obj, name.encode()) == 1
def obj_is_buffer(obj):
"""Check if an object implements the Python Buffer protocol
Provided because this function is not reliably available in all
version of PyPy's ctypes implementation.
Parameters
----------
obj : any
An object to check
"""
return PyObject_CheckBuffer(obj) == 1
class NanoarrowException(RuntimeError):
"""An error resulting from a call to the nanoarrow C library
Calls to the nanoarrow C library and/or the Arrow C Stream interface
callbacks return an errno error code and sometimes a message with extra
detail. This exception wraps a RuntimeError to format a suitable message
and store the components of the original error.
Parameters
----------
what : str
A string describing the context in which the exception was generated.
This is usually the name of a nanoarrow C library function.
code : int
An errno code (e.g., EINVAL) returned by a nanoarrow C library function.
message : str
An optional message (e.g., generated by inspecting an ArrowError).
If not provided, a message will be generated based on ``code`` and ``what``.
"""
def __init__(self, what, code, message=""):
self.what = what
self.code = code
self.message = message
if self.message == "":
super().__init__(f"{self.what} failed ({self.code})")
else:
super().__init__(f"{self.what} failed ({self.code}): {self.message}")
cdef class Error:
"""Memory holder for an ArrowError
ArrowError is the C struct that is optionally passed to nanoarrow functions
when a detailed error message might be returned. This class holds a C
reference to the object and provides helpers for raising exceptions based
on the contained message.
"""
def __cinit__(self):
self.c_error.message[0] = 0
cdef raise_message(self, what, code):
"""Raise a :class:`NanoarrowException` from the message held by
the wrapped ArrowError
"""
raise NanoarrowException(what, code, self.c_error.message.decode("UTF-8"))
cdef raise_message_not_ok(self, what, code):
"""Call :meth:`raise_message` if code it not NANOARROW_OK"""
if code == NANOARROW_OK:
return
self.raise_message(what, code)
@staticmethod
cdef raise_error(what, code):
"""Raise a :class:`NanoarrowException` without a message
"""
raise NanoarrowException(what, code, "")
@staticmethod
cdef raise_error_not_ok(what, code):
"""Call :meth:`raise_error_not_ok` if code it not NANOARROW_OK"""
if code == NANOARROW_OK:
return
Error.raise_error(what, code)
cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
"""Finalize an ArrowSchema capsule
Calls the ArrowSchema's release callback if the callback is non-null
and frees the memory for the pointed-to ``struct ArrowSchema``.
"""
cdef ArrowSchema* schema = <ArrowSchema*>PyCapsule_GetPointer(
schema_capsule, 'arrow_schema'
)
if schema.release != NULL:
ArrowSchemaRelease(schema)
ArrowFree(schema)
cdef object alloc_c_schema(ArrowSchema** c_schema):
"""Allocate an ArrowSchema and wrap it in a PyCapsule"""
c_schema[0] = <ArrowSchema*> ArrowMalloc(sizeof(ArrowSchema))
# Ensure the capsule destructor doesn't call a random release pointer
c_schema[0].release = NULL
return PyCapsule_New(c_schema[0], 'arrow_schema', &pycapsule_schema_deleter)
cdef void pycapsule_array_deleter(object array_capsule) noexcept:
"""Finalize an ArrowArray capsule
Calls the ArrowArray's release callback if the callback is non-null
and frees the memory for the pointed-to ``struct ArrowArray``.
"""
cdef ArrowArray* array = <ArrowArray*>PyCapsule_GetPointer(
array_capsule, 'arrow_array'
)
# Do not invoke the deleter on a used/moved capsule
if array.release != NULL:
ArrowArrayRelease(array)
ArrowFree(array)
cdef object alloc_c_array(ArrowArray** c_array):
"""Allocate an ArrowArray and wrap it in a PyCapsule"""
c_array[0] = <ArrowArray*> ArrowMalloc(sizeof(ArrowArray))
# Ensure the capsule destructor doesn't call a random release pointer
c_array[0].release = NULL
return PyCapsule_New(c_array[0], 'arrow_array', &pycapsule_array_deleter)
cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept:
"""Finalize an ArrowArrayStream capsule
Calls the ArrowArrayStream's release callback if the callback is non-null
and frees the memory for the pointed-to ``struct ArrowArrayStream``.
"""
cdef ArrowArrayStream* stream = <ArrowArrayStream*>PyCapsule_GetPointer(
stream_capsule, 'arrow_array_stream'
)
# Do not invoke the deleter on a used/moved capsule
if stream.release != NULL:
ArrowArrayStreamRelease(stream)
ArrowFree(stream)
cdef object alloc_c_array_stream(ArrowArrayStream** c_stream):
"""Allocate an ArrowArrayStream and wrap it in a PyCapsule"""
c_stream[0] = <ArrowArrayStream*> ArrowMalloc(sizeof(ArrowArrayStream))
# Ensure the capsule destructor doesn't call a random release pointer
c_stream[0].release = NULL
return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter)
cdef void pycapsule_device_array_deleter(object device_array_capsule) noexcept:
"""Finalize an ArrowDeviceArray capsule
Calls the ``array`` member's release callback if the callback is non-null
and frees the memory for the pointed-to ``struct ArrowDeviceArray``.
"""
cdef ArrowDeviceArray* device_array = <ArrowDeviceArray*>PyCapsule_GetPointer(
device_array_capsule, 'arrow_device_array'
)
# Do not invoke the deleter on a used/moved capsule
if device_array.array.release != NULL:
device_array.array.release(&device_array.array)
ArrowFree(device_array)
cdef object alloc_c_device_array(ArrowDeviceArray** c_device_array):
"""Allocate an ArrowDeviceArray and wrap it in a PyCapsule"""
c_device_array[0] = <ArrowDeviceArray*> ArrowMalloc(sizeof(ArrowDeviceArray))
# Ensure the capsule destructor doesn't call a random release pointer
c_device_array[0].array.release = NULL
return PyCapsule_New(c_device_array[0], 'arrow_device_array', &pycapsule_device_array_deleter)
cdef void pycapsule_array_view_deleter(object array_capsule) noexcept:
"""Finalize an ArrowArrayView capsule
Calls ``ArrowArrayViewReset()`` on the pointed to ``struct ArrowArrayView``
and frees the memory associated with the pointer.
"""
cdef ArrowArrayView* array_view = <ArrowArrayView*>PyCapsule_GetPointer(
array_capsule, 'nanoarrow_array_view'
)
ArrowArrayViewReset(array_view)
ArrowFree(array_view)
cdef object alloc_c_array_view(ArrowArrayView** c_array_view):
"""Allocate an ArrowArrayView and wrap it in a PyCapsule"""
c_array_view[0] = <ArrowArrayView*> ArrowMalloc(sizeof(ArrowArrayView))
ArrowArrayViewInitFromType(c_array_view[0], NANOARROW_TYPE_UNINITIALIZED)
return PyCapsule_New(c_array_view[0], 'nanoarrow_array_view', &pycapsule_array_view_deleter)
# Provide a way to validate that we release all references we create
cdef int64_t pyobject_buffer_count = 0
def get_pyobject_buffer_count():
"""Get the current borrowed ArrowBuffer count
Returns a count of Py_INCREF calls where Py_DECREF has not yet been
called on an ArrowBuffer borrowed from a Python object. This function is
used to test shallow copy behaviour for leaked PyObject references.
"""
global pyobject_buffer_count
return pyobject_buffer_count
cdef void c_deallocate_pyobject_buffer(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) noexcept with gil:
"""ArrowBufferDeallocatorCallback for an ArrowBuffer borrwed from a PyObject
"""
Py_DECREF(<object>allocator.private_data)
global pyobject_buffer_count
pyobject_buffer_count -= 1
cdef void c_pyobject_buffer(object base, const void* buf, int64_t size_bytes, ArrowBuffer* out):
"""Borrow an ArrowBuffer from base
This function populates ``out`` with an ``ArrowBuffer`` whose allocator has been
populated using ``ArrowBufferDeallocator()`` in such a way that ``Py_INCREF`` is
invoked on base when the buffer is created and ``Py_DECREF`` is invoked on base
when the buffer is destroyed using ``ArrowBufferReset()``. The net incref/decref
count can be checked with :func:`get_pyobject_buffer_count`.
"""
out.data = <uint8_t*>buf
out.size_bytes = size_bytes
out.allocator = ArrowBufferDeallocator(
<ArrowBufferDeallocatorCallback>c_deallocate_pyobject_buffer,
<void*>base
)
Py_INCREF(base)
global pyobject_buffer_count
pyobject_buffer_count += 1
cdef void c_array_shallow_copy(object base, const ArrowArray* src, ArrowArray* dst):
"""Make the shallowest possible (safe) copy of an ArrowArray
Once a CArray exists at the Python level, nanoarrow makes it very difficult
to perform an operation that might render the pointed-to ArrowArray invalid.
Performing a deep copy (i.e., copying buffer content) would be unexpected and
prohibitively expensive, and performing a truly shallow copy (i.e., adding
an ArrowArray implementation that simply PyINCREF/PyDECREFs the original array)
is not safe because the Arrow C Data interface specification allows children
to be "move"d. Even though nanoarrow's Python bindings do not do this unless
explicitly requested, when passed to some other library they are free to do so.
This implementation of a shallow copy creates a recursive copy of the original
array, including any children and dictionary (if present). It uses the
Loading ...