Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / nanoarrow   python

Repository URL to install this package:

Version: 0.7.0.dev132 

/ src / nanoarrow / c_buffer.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from nanoarrow._buffer import CBuffer, CBufferBuilder
from nanoarrow._utils import obj_is_buffer
from nanoarrow.c_schema import c_schema_view

from nanoarrow import _types


def c_buffer(obj, schema=None) -> CBuffer:
    """Owning, read-only ArrowBuffer wrapper

    If obj implements the Python buffer protocol, ``c_buffer()`` wraps
    obj in nanoarrow's owning buffer structure, the ArrowBuffer,
    such that it can be used to construct arrays. The ownership of the
    underlying buffer is handled by the Python buffer protocol
    (i.e., ``PyObject_GetBuffer()`` and ``PyBuffer_Release()``).

    If obj is iterable, a buffer will be allocated and populated with
    the contents of obj according to ``schema``. The
    ``schema`` parameter is required to create a buffer from
    a Python iterable. The ``struct`` module is currently used to encode
    values from obj into binary form.

    Unlike with :func:`c_array`, ``schema`` is explicitly
    honoured (or an error will be raised).

    Parameters
    ----------

    obj : buffer-like, tensor, or iterable
        A Python object that supports the Python buffer or DLPack protocols.
        This includes bytes, memoryview, bytearray, bulit-in types as well
        as numpy arrays.
    schema :  schema-like, optional
        The data type of the desired buffer as sanitized by
        :func:`c_schema`. Only values that make sense as buffer types are
        allowed (e.g., integer types, floating-point types, interval types,
        decimal types, binary, string, fixed-size binary).

    Examples
    --------

    >>> import nanoarrow as na
    >>> na.c_buffer(b"1234")
    nanoarrow.c_buffer.CBuffer(uint8[4 b] 49 50 51 52)
    >>> na.c_buffer([1, 2, 3], na.int32())
    nanoarrow.c_buffer.CBuffer(int32[12 b] 1 2 3)
    """
    if isinstance(obj, CBuffer) and schema is None:
        return obj

    if obj_is_buffer(obj):
        if schema is not None:
            raise NotImplementedError(
                "c_buffer() with schema for pybuffer is not implemented"
            )
        return CBuffer.from_pybuffer(obj)

    if _obj_is_tensor(obj):
        if schema is not None:
            raise NotImplementedError(
                "c_buffer() with schema for DLPack is not implemented"
            )
        return CBuffer.from_dlpack(obj)

    if _obj_is_iterable(obj):
        buffer, _ = _c_buffer_from_iterable(obj, schema)
        return buffer

    raise TypeError(
        f"Can't convert object of type {type(obj).__name__} to nanoarrow.c_buffer"
    )


def _c_buffer_from_iterable(obj, schema=None) -> CBuffer:
    import array

    # array.typecodes is not available in all PyPy versions.
    # Rather than guess, just don't use the array constructor if
    # this attribute is not available.
    if hasattr(array, "typecodes"):
        array_typecodes = array.typecodes
    else:
        array_typecodes = []

    if schema is None:
        raise ValueError("CBuffer from iterable requires schema")

    schema_view = c_schema_view(schema)
    if (
        schema_view.extension_name is not None
        or schema_view.storage_type_id != schema_view.type_id
    ):
        raise ValueError(
            f"Can't create buffer from iterable for type {schema_view.type}"
        )

    builder = CBufferBuilder()

    if schema_view.storage_type_id == _types.FIXED_SIZE_BINARY:
        builder.set_data_type(_types.BINARY, schema_view.fixed_size * 8)
    else:
        builder.set_data_type(schema_view.storage_type_id)

    # If we are using a typecode supported by the array module, it has much
    # faster implementations of safely building buffers from iterables
    if builder.format in array_typecodes and schema_view.storage_type_id != _types.BOOL:
        buf = array.array(builder.format, obj)
        return CBuffer.from_pybuffer(buf), len(buf)

    n_values = builder.write_elements(obj)
    return builder.finish(), n_values


def _obj_is_iterable(obj):
    return hasattr(obj, "__iter__")


def _obj_is_tensor(obj):
    return hasattr(obj, "__dlpack__")