Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / nanoarrow   python

Repository URL to install this package:

/ tests / test_c_buffer.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import struct
import sys
from datetime import date, datetime, timezone

import pytest
from nanoarrow._buffer import CBuffer, CBufferBuilder

import nanoarrow as na


def test_buffer_invalid():
    invalid = CBuffer()

    with pytest.raises(RuntimeError, match="CBuffer is not valid"):
        invalid._addr()
    with pytest.raises(RuntimeError, match="CBuffer is not valid"):
        invalid.size_bytes
    with pytest.raises(RuntimeError, match="CBuffer is not valid"):
        memoryview(invalid)

    assert repr(invalid) == "nanoarrow.c_buffer.CBuffer(<invalid>)"


def test_c_buffer_constructor():
    invalid = CBuffer()
    assert na.c_buffer(invalid) is invalid

    buffer = na.c_buffer(b"1234")
    assert isinstance(buffer, CBuffer)
    assert bytes(buffer) == b"1234"


def test_c_buffer_unsupported_format():
    empty = CBuffer.empty()

    if sys.byteorder == "little":
        with pytest.raises(ValueError, match="Can't convert format '>i' to Arrow type"):
            empty._set_format(">i")
    else:
        with pytest.raises(ValueError, match="Can't convert format '<i' to Arrow type"):
            empty._set_format("<i")

    with pytest.raises(ValueError, match=r"Unsupported Arrow type_id"):
        empty._set_data_type(na.Type.SPARSE_UNION.value)


def test_c_buffer_empty():
    empty = CBuffer.empty()

    assert empty._addr() == 0
    assert empty.size_bytes == 0
    assert bytes(empty) == b""

    assert repr(empty) == "nanoarrow.c_buffer.CBuffer(binary[0 b] b'')"

    # Export it via the Python buffer protocol wrapped in a new CBuffer
    empty_roundtrip = na.c_buffer(empty)
    assert empty_roundtrip.size_bytes == 0

    assert empty_roundtrip._addr() == 0
    assert empty_roundtrip.size_bytes == 0


def test_c_buffer_pybuffer():
    data = bytes(b"abcdefghijklmnopqrstuvwxyz")
    buffer = na.c_buffer(data)

    assert buffer.size_bytes == len(data)
    assert bytes(buffer) == b"abcdefghijklmnopqrstuvwxyz"

    assert repr(buffer).startswith("nanoarrow.c_buffer.CBuffer(uint8[26 b] 97 98")


def test_c_buffer_unsupported_type():
    with pytest.raises(TypeError, match="Can't convert object of type NoneType"):
        na.c_buffer(None, na.int32())


def test_c_buffer_missing_requested_schema():
    with pytest.raises(ValueError, match="CBuffer from iterable requires schema"):
        na.c_buffer([1, 2, 3])


def test_c_buffer_pybuffer_with_schema():
    with pytest.raises(
        NotImplementedError, match="schema for pybuffer is not implemented"
    ):
        na.c_buffer(b"1234", na.int32())


def test_c_buffer_integer():
    formats = ["b", "B", "h", "H", "i", "I", "l", "L", "q", "Q", "n", "N"]
    values = [0, 1, 2]

    for format in formats:
        packed = b""
        for value in values:
            packed += struct.pack(format, value)
        buffer = na.c_buffer(packed)._set_format(format)
        assert buffer.size_bytes == len(packed)

        assert len(buffer) == 3
        assert buffer[0] == 0
        assert buffer[1] == 1
        assert buffer[2] == 2
        assert list(buffer) == [0, 1, 2]
        assert list(buffer.elements()) == [0, 1, 2]
        assert buffer.n_elements == len(buffer)
        assert [buffer.element(i) for i in range(buffer.n_elements)] == list(buffer)


def test_numpy_c_buffer_numeric():
    np = pytest.importorskip("numpy")

    dtypes = [
        np.int8(),
        np.uint8(),
        np.int16(),
        np.uint16(),
        np.int32(),
        np.uint32(),
        np.int64(),
        np.uint64(),
        np.float16(),
        np.float32(),
        np.float64(),
        "|S1",
    ]

    for dtype in dtypes:
        array = np.array([0, 1, 2], dtype)
        buffer = na.c_buffer(array)
        assert list(buffer) == list(array)
        assert list(buffer.elements()) == list(array)

        array_roundtrip = np.array(buffer, copy=False)
        np.testing.assert_array_equal(array_roundtrip, array)

        buffer_roundtrip = na.c_buffer(array_roundtrip)
        assert buffer_roundtrip._addr() == buffer._addr()


def test_c_buffer_float():
    formats = ["e", "f", "d"]
    values = [0.0, 1.0, 2.0]

    for format in formats:
        packed = b""
        for value in values:
            packed += struct.pack(format, value)
        buffer = na.c_buffer(packed)._set_format(format)
        assert buffer.size_bytes == len(packed)

        assert len(buffer) == 3
        assert buffer[0] == 0.0
        assert buffer[1] == 1.0
        assert buffer[2] == 2.0
        assert list(buffer) == [0.0, 1.0, 2.0]


def test_c_buffer_string():
    packed = b"abcdefg"
    buffer = na.c_buffer(packed)._set_format("c")
    assert buffer.size_bytes == len(packed)

    assert len(buffer) == len(packed)
    assert list(buffer) == [c.encode("UTF-8") for c in "abcdefg"]


def test_c_buffer_fixed_size_binary():
    items = [b"abcd", b"efgh", b"ijkl"]
    packed = b"".join(items)
    buffer = na.c_buffer(packed)._set_format("4s")
    assert buffer.size_bytes == len(packed)

    assert len(buffer) == 3
    assert buffer[0] == b"abcd"
    assert buffer[1] == b"efgh"
    assert buffer[2] == b"ijkl"
    assert list(buffer) == items


def test_c_buffer_builder():
    builder = CBufferBuilder()
    assert builder.size_bytes == 0
    assert builder.capacity_bytes == 0
    assert repr(builder) == "nanoarrow.c_buffer.CBufferBuilder(0/0)"

    builder.reserve_bytes(123)
    assert builder.size_bytes == 0
    assert builder.capacity_bytes == 123

    builder.write(b"abcde")
    assert builder.size_bytes == 5
    assert builder.capacity_bytes == 123

    builder.write(b"fghij")
    assert builder.size_bytes == 10
    assert builder.capacity_bytes == 123

    with pytest.raises(IndexError):
        builder.advance(-11)

    with pytest.raises(IndexError):
        builder.advance(114)


def test_c_buffer_builder_buffer_protocol():
    import platform

    builder = CBufferBuilder()
    builder.reserve_bytes(1)

    with memoryview(builder) as mv:
        assert len(mv) == 1

        with pytest.raises(BufferError, match="CBufferBuilder is locked"):
            memoryview(builder)

        with pytest.raises(BufferError, match="CBufferBuilder is locked"):
            assert bytes(builder.finish()) == b"abcdefghij"

        mv[builder.size_bytes] = ord("k")

    if platform.python_implementation() == "PyPy":
        pytest.skip("memoryview() release is not guaranteed on PyPy")

    builder.advance(1)
    assert bytes(builder.finish()) == b"k"


def test_c_buffer_from_iterable():
    buffer = na.c_buffer([1, 2, 3], na.int32())
    assert buffer.size_bytes == 12
    assert buffer.data_type == "int32"
    assert buffer.element_size_bits == 32
    assert buffer.itemsize == 4
    assert list(buffer) == [1, 2, 3]

    # An Arrow type that does not make sense as a buffer type will error
    with pytest.raises(ValueError, match="Unsupported Arrow type_id"):
        na.c_buffer([], na.struct([]))

    # An Arrow type whose storage type is not the same as its top-level
    # type will error.
    with pytest.raises(ValueError, match="Can't create buffer"):
        na.c_buffer([1, 2, 3], na.dictionary(na.int32(), na.string()))

    with pytest.raises(ValueError, match="Can't create buffer"):
        na.c_buffer([1, 2, 3], na.extension_type(na.int32(), "arrow.test"))


def test_c_buffer_from_fixed_size_binary_iterable():
    items = [b"abcd", b"efgh", b"ijkl"]
    buffer = na.c_buffer(items, na.fixed_size_binary(4))
    assert buffer.data_type == "binary"
    assert buffer.element_size_bits == 32
    assert buffer.itemsize == 4
    assert bytes(buffer) == b"".join(items)
    assert list(buffer) == items


def test_c_buffer_from_day_time_iterable():
    buffer = na.c_buffer([(1, 2), (3, 4), (5, 6)], na.interval_day_time())
    assert buffer.data_type == "interval_day_time"
    assert buffer.element_size_bits == 64
    assert buffer.itemsize == 8
    assert list(buffer) == [(1, 2), (3, 4), (5, 6)]


def test_c_buffer_from_month_day_nano_iterable():
    buffer = na.c_buffer([(1, 2, 3), (4, 5, 6)], na.interval_month_day_nano())
    assert buffer.data_type == "interval_month_day_nano"
    assert buffer.element_size_bits == 128
    assert buffer.itemsize == 16
    assert list(buffer) == [(1, 2, 3), (4, 5, 6)]


def test_c_buffer_from_decimal128_iterable():
    bytes64 = bytes(range(64))
    buffer = na.c_buffer(
        [bytes64[0:16], bytes64[16:32], bytes64[32:48], bytes64[48:64]],
        na.decimal128(10, 3),
    )
    assert buffer.data_type == "decimal128"
    assert buffer.element_size_bits == 128
    assert buffer.itemsize == 16
    assert list(buffer) == [
        bytes64[0:16],
        bytes64[16:32],
        bytes64[32:48],
        bytes64[48:64],
    ]


def test_c_buffer_from_decimal256_iterable():
    bytes64 = bytes(range(64))
    buffer = na.c_buffer([bytes64[0:32], bytes64[32:64]], na.decimal256(10, 3))
    assert buffer.data_type == "decimal256"
    assert buffer.element_size_bits == 256
    assert buffer.itemsize == 32
    assert list(buffer) == [bytes64[0:32], bytes64[32:64]]


def test_c_buffer_bitmap_from_iterable():
    # Check something less than one byte
    buffer = na.c_buffer([True, False, False, True], na.bool_())
    assert "10010000" in repr(buffer)
    assert buffer.size_bytes == 1
    assert buffer.data_type == "bool"
    assert buffer.itemsize == 1
    assert buffer.element_size_bits == 1
    assert list(buffer.elements()) == [
        True,
        False,
        False,
        True,
        False,
        False,
        False,
        False,
    ]
    assert [buffer.element(i) for i in range(buffer.n_elements)] == list(
        buffer.elements()
    )

    # Check something exactly one byte
    buffer = na.c_buffer([True, False, False, True] * 2, na.bool_())
Loading ...