# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
from nanoarrow.c_schema import allocate_c_schema, c_schema_view
import nanoarrow as na
def test_c_schema_basic():
schema = allocate_c_schema()
assert schema.is_valid() is False
assert schema._to_string() == "[invalid: schema is released]"
assert repr(schema) == "<nanoarrow.c_schema.CSchema <released>>"
schema = na.c_schema(na.struct({"some_name": na.int32()}))
assert schema.format == "+s"
assert schema.flags == 2
assert schema.metadata is None
assert schema.n_children == 1
assert len(list(schema.children)) == 1
assert schema.child(0).format == "i"
assert schema.child(0).name == "some_name"
assert schema.child(0)._to_string() == "int32"
assert "<nanoarrow.c_schema.CSchema int32>" in repr(schema)
assert schema.dictionary is None
with pytest.raises(IndexError):
schema.child(1)
def test_c_schema_dictionary():
pa = pytest.importorskip("pyarrow")
schema = na.c_schema(pa.dictionary(pa.int32(), pa.utf8()))
assert schema.format == "i"
assert schema.dictionary.format == "u"
assert "dictionary: <nanoarrow.c_schema.CSchema string" in repr(schema)
def test_schema_metadata():
meta = {"key1": "value1", "key2": "value2"}
schema = na.c_schema(na.int32()).modify(metadata=meta)
assert len(schema.metadata) == 2
meta2 = {k: v for k, v in schema.metadata.items()}
assert list(meta2.keys()) == [b"key1", b"key2"]
assert list(meta2.values()) == [b"value1", b"value2"]
assert "b'key1': b'value1'" in repr(schema)
def test_c_schema_view():
schema = allocate_c_schema()
with pytest.raises(RuntimeError):
c_schema_view(schema)
schema = na.c_schema(na.int32())
view = c_schema_view(schema)
assert "- type: 'int32'" in repr(view)
assert view.type == "int32"
assert view.storage_type == "int32"
assert view.fixed_size is None
assert view.decimal_bitwidth is None
assert view.decimal_scale is None
assert view.time_unit is None
assert view.timezone is None
assert view.union_type_ids is None
assert view.extension_name is None
assert view.extension_metadata is None
def test_c_schema_view_extra_params():
view = c_schema_view(na.fixed_size_binary(12))
assert view.fixed_size == 12
view = c_schema_view(na.decimal128(10, 3))
assert view.decimal_bitwidth == 128
assert view.decimal_precision == 10
assert view.decimal_scale == 3
view = c_schema_view(na.decimal256(10, 3))
assert view.decimal_bitwidth == 256
assert view.decimal_precision == 10
assert view.decimal_scale == 3
view = c_schema_view(na.duration("us"))
assert view.time_unit == "us"
view = c_schema_view(na.timestamp("us", "America/Halifax"))
assert view.type == "timestamp"
assert view.storage_type == "int64"
assert view.time_unit == "us"
assert view.timezone == "America/Halifax"
pa = pytest.importorskip("pyarrow")
view = c_schema_view(pa.list_(pa.int32(), 12))
assert view.fixed_size == 12
def test_c_schema_metadata():
meta = {
b"ARROW:extension:name": b"some_name",
b"ARROW:extension:metadata": b"some_metadata",
}
schema = na.c_schema(na.int32()).modify(metadata=meta)
assert "b'some_name'" in repr(schema)
assert "b'some_name'" in repr(schema.metadata)
assert list(schema.metadata) == list(meta)
assert list(schema.metadata.items()) == list(meta.items())
assert list(schema.metadata.keys()) == list(meta.keys())
assert list(schema.metadata.values()) == list(meta.values())
view = c_schema_view(schema)
assert view.extension_name == "some_name"
assert view.extension_metadata == b"some_metadata"
def test_c_schema_equals():
int32 = na.c_schema(na.int32())
struct = na.c_schema(na.struct({"col1": na.int32()}))
dictionary = na.c_schema(na.dictionary(na.int32(), na.string()))
ordered_dictionary = na.c_schema(
na.dictionary(na.int32(), na.string(), dictionary_ordered=True)
)
# Check schemas pointing to the same ArrowSchema
assert int32.type_equals(int32)
# Check equality with deep copies
assert int32.type_equals(int32.__deepcopy__())
assert struct.type_equals(struct.__deepcopy__())
assert dictionary.type_equals(dictionary.__deepcopy__())
# Check inequality because of format
assert int32.type_equals(struct) is False
# Check inequality because of nullability
assert int32.type_equals(int32.modify(flags=0), check_nullability=True) is False
# ...but not by default
assert int32.type_equals(int32.modify(flags=0)) is True
# Check inequality of type information encoded in flags
assert dictionary.type_equals(ordered_dictionary) is False
# Check inequality because of number of children
assert struct.type_equals(struct.modify(children=[])) is False
# Check inequality because of a difference in the children
assert struct.type_equals(struct.modify(children=[dictionary])) is False
# Check inequality because of dictionary presence
assert int32.type_equals(dictionary) is False
assert dictionary.type_equals(int32) is False
# Check inequality because of dictionary index type
assert (
dictionary.type_equals(na.c_schema(na.dictionary(na.int64(), na.string())))
is False
)
# Check inequality because of dictionary value type
assert dictionary.type_equals(dictionary.modify(dictionary=struct)) is False
def test_c_schema_assert_type_equal():
from nanoarrow._schema import assert_type_equal
int32 = na.c_schema(na.int32())
string = na.c_schema(na.string())
nn_string = na.c_schema(na.string(False))
assert_type_equal(int32, int32, check_nullability=True)
with pytest.raises(TypeError):
assert_type_equal(None, int32, check_nullability=False)
with pytest.raises(TypeError):
assert_type_equal(int32, None, check_nullability=False)
msg = "Expected schema\n 'string'\nbut got\n 'int32'"
with pytest.raises(ValueError, match=msg):
assert_type_equal(int32, string, check_nullability=False)
assert_type_equal(nn_string, string, check_nullability=False)
with pytest.raises(ValueError):
assert_type_equal(nn_string, string, check_nullability=True)
def test_c_schema_modify():
schema = na.c_schema(na.null())
schema_clone = schema.modify()
assert schema_clone is not schema
assert schema._addr() != schema_clone._addr()
schema_formatted = schema.modify(format="i")
assert schema_formatted.format == "i"
schema_named = schema.modify(name="something else")
assert schema_named.name == "something else"
assert schema_named.format == schema.format
schema_flagged = schema.modify(flags=0)
assert schema_flagged.flags == 0
assert schema_flagged.format == schema.format
schema_non_nullable = schema.modify(nullable=False)
assert schema_non_nullable.flags == 0
assert schema_non_nullable.format == schema.format
meta = {"some key": "some value"}
schema_metad = schema.modify(metadata=meta)
assert list(schema_metad.metadata.items()) == [(b"some key", b"some value")]
assert schema_non_nullable.format == schema.format
schema_metad2 = schema.modify(metadata=schema_metad.metadata)
assert list(schema_metad2.metadata.items()) == [(b"some key", b"some value")]
schema_no_metad = schema_metad.modify(metadata={})
assert schema_no_metad.metadata is None
def test_c_schema_modify_children():
schema = na.c_schema(na.struct({"col1": na.null()}))
schema_same_children = schema.modify()
assert schema_same_children.n_children == 1
assert schema_same_children.child(0).name == "col1"
assert schema_same_children.child(0).format == "n"
schema_new_children_list = schema.modify(
children=[na.c_schema(na.int32()).modify(name="new name")]
)
assert schema_new_children_list.n_children == 1
assert schema_new_children_list.child(0).name == "new name"
assert schema_new_children_list.child(0).format == "i"
schema_new_children_dict = schema.modify(
children={"new name": na.c_schema(na.int32())}
)
assert schema_new_children_dict.n_children == 1
assert schema_new_children_dict.child(0).name == "new name"
assert schema_new_children_dict.child(0).format == "i"
def test_c_schema_modify_dictionary():
schema = na.c_schema(na.int32())
schema_dictionary = schema.modify(dictionary=na.c_schema(na.string()))
assert schema_dictionary.format == "i"
assert schema_dictionary.dictionary.format == "u"
schema_same_dictionary = schema_dictionary.modify()
assert schema_same_dictionary.format == "i"
assert schema_same_dictionary.dictionary.format == "u"
schema_no_dictionary = schema_dictionary.modify(dictionary=False)
assert schema_no_dictionary.format == "i"
assert schema.dictionary is None