# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import enum
import pyarrow as pa
class DlpackDeviceType(enum.IntEnum):
"""Integer enum for device type codes matching DLPack."""
CPU = 1
CUDA = 2
CPU_PINNED = 3
OPENCL = 4
VULKAN = 7
METAL = 8
VPI = 9
ROCM = 10
class _PyArrowBuffer:
"""
Data in the buffer is guaranteed to be contiguous in memory.
Note that there is no dtype attribute present, a buffer can be thought of
as simply a block of memory. However, if the column that the buffer is
attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
implemented, then that dtype information will be contained in the return
value from ``__dlpack__``.
This distinction is useful to support both data exchange via DLPack on a
buffer and (b) dtypes like variable-length strings which do not have a
fixed number of bytes per element.
"""
def __init__(self, x: pa.Buffer, allow_copy: bool = True) -> None:
"""
Handle PyArrow Buffers.
"""
self._x = x
@property
def bufsize(self) -> int:
"""
Buffer size in bytes.
"""
return self._x.size
@property
def ptr(self) -> int:
"""
Pointer to start of the buffer as an integer.
"""
return self._x.address
def __dlpack__(self):
"""
Produce DLPack capsule (see array API standard).
Raises:
- TypeError : if the buffer contains unsupported dtypes.
- NotImplementedError : if DLPack support is not implemented
Useful to have to connect to array libraries. Support optional because
it's not completely trivial to implement for a Python-only library.
"""
raise NotImplementedError("__dlpack__")
def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
"""
Device type and device ID for where the data in the buffer resides.
Uses device type codes matching DLPack.
Note: must be implemented even if ``__dlpack__`` is not.
"""
if self._x.is_cpu:
return (DlpackDeviceType.CPU, None)
else:
raise NotImplementedError("__dlpack_device__")
def __repr__(self) -> str:
return (
"PyArrowBuffer(" +
str(
{
"bufsize": self.bufsize,
"ptr": self.ptr,
"device": self.__dlpack_device__()[0].name,
}
) +
")"
)