Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / nanoarrow   python

Repository URL to install this package:

/ tests / test_ipc.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import io
import os
import pathlib
import tempfile

import pytest
from nanoarrow._utils import NanoarrowException

import nanoarrow as na
from nanoarrow.ipc import InputStream, StreamWriter


def test_ipc_stream_example():
    with InputStream.example() as input:
        assert input._is_valid() is True
        assert "BytesIO object" in repr(input)

        stream = na.c_array_stream(input)
        assert input._is_valid() is False
        assert stream.is_valid() is True
        assert repr(input) == "<nanoarrow.ipc.InputStream <invalid>>"
        with pytest.raises(RuntimeError, match="no longer valid"):
            stream = na.c_array_stream(input)

        with stream:
            schema = stream.get_schema()
            assert schema.format == "+s"
            assert schema.child(0).format == "i"
            batches = list(stream)
            assert stream.is_valid() is True

        assert stream.is_valid() is False
        assert len(batches) == 1
        batch = na.c_array(batches[0]).view()
        assert list(batch.child(0).buffer(1)) == [1, 2, 3]


def test_ipc_stream_from_readable():
    with io.BytesIO(InputStream.example_bytes()) as f:
        with InputStream.from_readable(f) as input:
            assert input._is_valid() is True
            assert "BytesIO object" in repr(input)

            with na.c_array_stream(input) as stream:
                batches = list(stream)
                assert len(batches) == 1
                assert len(batches[0]) == 3


def test_ipc_stream_from_path():
    with tempfile.TemporaryDirectory() as td:
        path = os.path.join(td, "test.arrows")
        with open(path, "wb") as f:
            f.write(InputStream.example_bytes())

        with InputStream.from_path(path) as input:
            assert repr(path) in repr(input)
            with na.c_array_stream(input) as stream:
                batches = list(stream)
                assert len(batches) == 1
                assert len(batches[0]) == 3


def test_ipc_stream_from_url():
    with tempfile.TemporaryDirectory() as td:
        path = os.path.join(td, "test.arrows")
        with open(path, "wb") as f:
            f.write(InputStream.example_bytes())

        uri = pathlib.Path(path).as_uri()
        with InputStream.from_url(uri) as input:
            with na.c_array_stream(input) as stream:
                batches = list(stream)
                assert len(batches) == 1
                assert len(batches[0]) == 3


def test_ipc_stream_python_exception_on_read():
    class ExtraordinarilyInconvenientFile:
        def readinto(self, *args, **kwargs):
            raise RuntimeError("I error for all read requests")

    input = InputStream.from_readable(ExtraordinarilyInconvenientFile())
    with pytest.raises(
        NanoarrowException, match="RuntimeError: I error for all read requests"
    ):
        na.c_array_stream(input)


def test_ipc_stream_error_on_read():
    with io.BytesIO(InputStream.example_bytes()[:100]) as f:
        with InputStream.from_readable(f) as input:

            with pytest.raises(
                NanoarrowException,
                match="Expected >= 280 bytes of remaining data",
            ):
                na.c_array_stream(input)


def test_writer_from_writable():
    array = na.c_array_from_buffers(
        na.struct({"some_col": na.int32()}),
        length=3,
        buffers=[],
        children=[na.c_array([1, 2, 3], na.int32())],
    )

    out = io.BytesIO()
    with StreamWriter.from_writable(out) as writer:
        writer.write_array(array)

    with na.ArrayStream.from_readable(out.getvalue()) as stream:
        assert stream.read_all().to_pylist() == na.Array(array).to_pylist()


def test_writer_from_path():
    array = na.c_array_from_buffers(
        na.struct({"some_col": na.int32()}),
        length=3,
        buffers=[],
        children=[na.c_array([1, 2, 3], na.int32())],
    )

    with tempfile.TemporaryDirectory() as td:
        path = os.path.join(td, "test.arrows")

        with StreamWriter.from_path(path) as writer:
            writer.write_array(array)

        with na.ArrayStream.from_path(path) as stream:
            assert stream.read_all().to_pylist() == na.Array(array).to_pylist()


def test_writer_write_stream_schema():
    array = na.c_array_from_buffers(
        na.struct({"some_col": na.int32()}),
        length=3,
        buffers=[],
        children=[na.c_array([1, 2, 3], na.int32())],
    )
    zero_chunk_array = na.Array.from_chunks([], array.schema)

    out = io.BytesIO()
    with StreamWriter.from_writable(out) as writer:
        writer.write_stream(zero_chunk_array)
        schema_bytes = out.getvalue()

    with StreamWriter.from_writable(out) as writer:
        out.truncate(0)
        out.seek(0)
        writer.write_stream(zero_chunk_array)
        writer.write_stream(zero_chunk_array, write_schema=True)
        two_schema_bytes = out.getvalue()

    assert (schema_bytes + schema_bytes) == two_schema_bytes

    with StreamWriter.from_writable(out) as writer:
        out.truncate(0)
        out.seek(0)
        writer.write_array(array, write_schema=False)
        array_bytes = out.getvalue()

    with StreamWriter.from_writable(out) as writer:
        out.truncate(0)
        out.seek(0)
        writer.write_array(array, write_schema=True)
        both_bytes = out.getvalue()

    assert (schema_bytes + array_bytes) == both_bytes


def test_writer_serialize_stream():
    array = na.c_array_from_buffers(
        na.struct({"some_col": na.int32()}),
        length=3,
        buffers=[],
        children=[na.c_array([1, 2, 3], na.int32())],
    )

    out = io.BytesIO()
    with StreamWriter.from_writable(out) as writer:
        writer.write_array(array)

        # Check that we can't serialize after we've already written to stream
        with pytest.raises(ValueError, match="Can't serialize_stream"):
            writer.serialize_stream(array)

        schema_and_array_bytes = out.getvalue()

    end_of_stream_bytes = b"\xff\xff\xff\xff\x00\x00\x00\x00"

    writer = StreamWriter.from_writable(out)
    out.truncate(0)
    out.seek(0)
    writer.serialize_stream(array)
    assert writer._is_valid() is False

    serialized_bytes = out.getvalue()
    assert (schema_and_array_bytes + end_of_stream_bytes) == serialized_bytes


def test_writer_python_exception_on_write():
    class ExtraordinarilyInconvenientFile:
        def write(self, *args, **kwargs):
            raise RuntimeError("I error for all write requests")

    with pytest.raises(NanoarrowException, match="I error for all write requests"):
        with StreamWriter.from_writable(ExtraordinarilyInconvenientFile()) as writer:
            writer.write(na.c_array([], na.struct([na.int32()])))


def test_writer_error_on_write():
    with pytest.raises(NanoarrowException):
        with StreamWriter.from_writable(io.BytesIO()) as writer:
            writer.write_stream(na.c_array([], na.int32()))