src/nanoarrow/_repr_utils.py · arrow-nightlies/nanoarrow

arrow-nightlies / nanoarrow python

Repository URL to install this package:
Version: 0.7.0.dev132

/ src / nanoarrow / _repr_utils.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# The functions here are imported from Cython. They're defined here
# instead of there to make it easier to iterate (no need to rebuild
# after editing when working with an editable installation)


def make_class_label(obj, module=None):
    if module is None:
        module = obj.__class__.__module__
    return f"{module}.{obj.__class__.__name__}"


def c_schema_to_string(obj, max_char_width=80):
    c_schema_string = ""
    if max_char_width == 0:
        c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width)
    else:
        max_char_width = max(max_char_width, 10)
        c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width + 1)
        if len(c_schema_string) > max_char_width:
            c_schema_string = c_schema_string[: (max_char_width - 3)] + "..."
    return c_schema_string


def metadata_repr(obj, indent=0, max_char_width=80):
    indent_str = " " * indent
    lines = []
    for key, value in obj.items():
        line = f"{indent_str}- {repr(key)}: {repr(value)}"
        lines.append(line[:max_char_width])

    return "\n".join(lines)


def schema_repr(schema, indent=0):
    indent_str = " " * indent
    class_label = make_class_label(schema, module="nanoarrow.c_schema")
    if schema._addr() == 0:
        return f"<{class_label} <NULL>>"
    elif not schema.is_valid():
        return f"<{class_label} <released>>"

    lines = [f"<{class_label} {schema._to_string()}>"]

    for attr in ("format", "name", "flags"):
        attr_repr = repr(getattr(schema, attr))
        lines.append(f"{indent_str}- {attr}: {attr_repr}")

    metadata = schema.metadata
    if schema.metadata is None:
        lines.append(f"{indent_str}- metadata: NULL")
    else:
        lines.append(f"{indent_str}- metadata:")
        lines.append(metadata_repr(metadata, indent + 2))

    if schema.dictionary:
        dictionary_repr = schema_repr(schema.dictionary, indent=indent + 2)
        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
    else:
        lines.append(f"{indent_str}- dictionary: NULL")

    lines.append(f"{indent_str}- children[{schema.n_children}]:")
    for child in schema.children:
        child_repr = schema_repr(child, indent=indent + 4)
        lines.append(f"{indent_str}  {repr(child.name)}: {child_repr}")

    return "\n".join(lines)


def array_repr(array, indent=0, max_char_width=80):
    if max_char_width < 20:
        max_char_width = 20

    indent_str = " " * indent
    class_label = make_class_label(array, module="nanoarrow.c_array")
    if array._addr() == 0:
        return f"<{class_label} <NULL>>"
    elif not array.is_valid():
        return f"<{class_label} <released>>"

    schema_string = array.schema._to_string(
        max_chars=max_char_width - indent - 23, recursive=True
    )
    lines = [f"<{class_label} {schema_string}>"]
    for attr in ("length", "offset", "null_count", "buffers"):
        attr_repr = repr(getattr(array, attr))
        lines.append(f"{indent_str}- {attr}: {attr_repr}")

    if array.dictionary:
        dictionary_repr = array_repr(array.dictionary, indent=indent + 2)
        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
    else:
        lines.append(f"{indent_str}- dictionary: NULL")

    lines.append(f"{indent_str}- children[{array.n_children}]:")
    for child in array.children:
        child_repr = array_repr(child, indent=indent + 4)
        lines.append(f"{indent_str}  {repr(child.schema.name)}: {child_repr}")

    return "\n".join(lines)


def schema_view_repr(schema_view):
    class_label = make_class_label(schema_view, module="nanoarrow.c_schema")

    lines = [
        f"<{class_label}>",
        f"- type: {repr(schema_view.type)}",
        f"- storage_type: {repr(schema_view.storage_type)}",
    ]

    for attr_name in sorted(dir(schema_view)):
        if attr_name.startswith("_") or attr_name in ("type", "storage_type"):
            continue

        attr_value = getattr(schema_view, attr_name)
        if attr_value is None:
            continue

        lines.append(f"- {attr_name}: {repr(attr_value)}")

    return "\n".join(lines)


def array_view_repr(array_view, max_char_width=80, indent=0):
    indent_str = " " * indent
    class_label = make_class_label(array_view, module="nanoarrow.c_array")

    lines = [f"<{class_label}>"]

    for attr in ("storage_type", "length", "offset", "null_count"):
        attr_repr = repr(getattr(array_view, attr))
        lines.append(f"{indent_str}- {attr}: {attr_repr}")

    lines.append(f"{indent_str}- buffers[{array_view.n_buffers}]:")
    for i, buffer in enumerate(array_view.buffers):
        buffer_type = array_view.buffer_type(i)
        lines.append(
            f"{indent_str}  - {buffer_type} "
            f"<{buffer_view_repr(buffer, max_char_width - indent - 4 - len(buffer))}>"
        )

    if array_view.dictionary:
        dictionary_repr = array_view_repr(
            array_view.dictionary, max_char_width=max_char_width, indent=indent + 2
        )
        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
    else:
        lines.append(f"{indent_str}- dictionary: NULL")

    lines.append(f"{indent_str}- children[{array_view.n_children}]:")
    for child in array_view.children:
        child_repr = array_view_repr(
            child, max_char_width=max_char_width, indent=indent + 4
        )
        lines.append(f"{indent_str}  - {child_repr}")

    return "\n".join(lines)


def buffer_view_repr(buffer_view, max_char_width=80):
    if max_char_width < 20:
        max_char_width = 20

    prefix = f"{buffer_view.data_type}"
    prefix += f"[{buffer_view.size_bytes} b]"

    if buffer_view.device.device_type_id == 1:
        return (
            prefix
            + " "
            + buffer_view_preview_cpu(buffer_view, max_char_width - len(prefix) - 2)
        )
    else:
        dev_info = f"<{buffer_view.device.device_type}/{buffer_view.device.device_id}>"
        return prefix + dev_info


def buffer_view_preview_cpu(buffer_view, max_char_width):
    if buffer_view.element_size_bits == 0:
        preview_elements = max_char_width - 3
        joined = repr(bytes(memoryview(buffer_view)[:preview_elements]))
    elif buffer_view.element_size_bits == 1:
        max_elements = max_char_width // 8
        if max_elements > len(buffer_view):
            preview_elements = len(buffer_view)
        else:
            preview_elements = max_elements

        joined = "".join(
            "".join(reversed(format(buffer_view[i], "08b")))
            for i in range(preview_elements)
        )
    else:
        max_elements = max_char_width // 3
        if max_elements > len(buffer_view):
            preview_elements = len(buffer_view)
        else:
            preview_elements = max_elements

        joined = " ".join(repr(buffer_view[i]) for i in range(preview_elements))

    if len(joined) > max_char_width or preview_elements < len(buffer_view):
        return joined[: (max_char_width - 3)] + "..."
    else:
        return joined


def array_stream_repr(array_stream, max_char_width=80):
    class_label = make_class_label(array_stream, module="nanoarrow.c_array_stream")

    if array_stream._addr() == 0:
        return f"<{class_label} <NULL>>"
    elif not array_stream.is_valid():
        return f"<{class_label} <released>>"

    lines = [f"<{class_label}>"]
    try:
        schema = array_stream.get_schema()
        schema_string = schema._to_string(max_chars=max_char_width - 16, recursive=True)
        lines.append(f"- get_schema(): {schema_string}")
    except Exception as e:
        lines.append(f"- get_schema(): <error calling get_schema(): {e}>")

    return "\n".join(lines)


def device_array_repr(device_array):
    class_label = make_class_label(device_array, module="nanoarrow.device")

    title_line = f"<{class_label}>"
    device_type = (
        f"- device_type: {device_array.device_type.name} "
        f"<{device_array.device_type_id}>"
    )
    device_id = f"- device_id: {device_array.device_id}"
    array = f"- array: {array_repr(device_array.array, indent=2)}"
    return "\n".join((title_line, device_type, device_id, array))


def device_repr(device):
    class_label = make_class_label(device, module="nanoarrow.device")

    title_line = f"<{class_label}>"
    device_type = f"- device_type: {device.device_type.name} <{device.device_type_id}>"
    device_id = f"- device_id: {device.device_id}"
    return "\n".join([title_line, device_type, device_id])


def array_inspect(array, indent=0, max_char_width=80):
    array_view = array.view()

    if max_char_width < 20:
        max_char_width = 20

    indent_str = " " * indent
    class_label = "ArrowArray"
    if array._addr() == 0:
        return f"<{class_label} <NULL>>"
    elif not array.is_valid():
        return f"<{class_label} <released>>"

    schema_string = array.schema._to_string(
        max_chars=max_char_width - indent - 23, recursive=True
    )
    lines = [f"<{class_label} {schema_string}>"]
    for attr in ("length", "offset", "null_count"):
        attr_repr = repr(getattr(array, attr))
        lines.append(f"{indent_str}- {attr}: {attr_repr}")

    lines.append(f"{indent_str}- buffers[{array_view.n_buffers}]:")
    for i, buffer in enumerate(array_view.buffers):
        buffer_type = array_view.buffer_type(i)
        lines.append(
            f"{indent_str}  - {buffer_type} "
            f"<{buffer_view_repr(buffer, max_char_width - indent - 4 - len(buffer))}>"
        )

    if array.dictionary:
        dictionary_repr = array_inspect(array.dictionary, indent=indent + 2)
        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
    else:
        lines.append(f"{indent_str}- dictionary: NULL")

    lines.append(f"{indent_str}- children[{array.n_children}]:")
    for child in array.children:
        child_repr = array_inspect(child, indent=indent + 4)
        lines.append(f"{indent_str}  {repr(child.schema.name)}: {child_repr}")

    return "\n".join(lines)
arrow-nightlies / nanoarrow python

Version: 0.7.0.dev132

/ src / nanoarrow / _repr_utils.py

Products

About

Resources

Contact Gemfury