// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string_view>
#include "arrow/array.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/binary_view_util.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/functional.h"
namespace arrow {
namespace internal {
template <typename T, typename Enable = void>
struct ArraySpanInlineVisitor {};
// Numeric and primitive C-compatible types
template <typename T>
struct ArraySpanInlineVisitor<T, enable_if_has_c_type<T>> {
using c_type = typename T::c_type;
template <typename ValidFunc, typename NullFunc>
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
if constexpr (std::is_same_v<T, BooleanType>) {
int64_t offset = arr.offset;
const uint8_t* data = arr.buffers[1].data;
return VisitBitBlocks(
arr.buffers[0].data, offset, arr.length,
[&](int64_t i) { return valid_func(bit_util::GetBit(data, offset + i)); },
std::forward<NullFunc>(null_func));
} else {
const c_type* data = arr.GetValues<c_type>(1);
auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
return VisitBitBlocks(arr.buffers[0].data, arr.offset, arr.length,
std::move(visit_valid), std::forward<NullFunc>(null_func));
}
}
template <typename ValidFunc, typename NullFunc>
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
if constexpr (std::is_same_v<T, BooleanType>) {
int64_t offset = arr.offset;
const uint8_t* data = arr.buffers[1].data;
VisitBitBlocksVoid(
arr.buffers[0].data, offset, arr.length,
[&](int64_t i) { valid_func(bit_util::GetBit(data, offset + i)); },
std::forward<NullFunc>(null_func));
} else {
const c_type* data = arr.GetValues<c_type>(1);
auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
VisitBitBlocksVoid(arr.buffers[0].data, arr.offset, arr.length,
std::move(visit_valid), std::forward<NullFunc>(null_func));
}
}
};
// Binary, String...
template <typename T>
struct ArraySpanInlineVisitor<T, enable_if_base_binary<T>> {
using c_type = std::string_view;
template <typename ValidFunc, typename NullFunc>
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
using offset_type = typename T::offset_type;
constexpr char empty_value = 0;
if (arr.length == 0) {
return Status::OK();
}
const offset_type* offsets = arr.GetValues<offset_type>(1);
const char* data;
if (arr.buffers[2].data == NULLPTR) {
data = &empty_value;
} else {
// Do not apply the array offset to the values array; the value_offsets
// index the non-sliced values array.
data = arr.GetValues<char>(2, /*absolute_offset=*/0);
}
offset_type cur_offset = *offsets++;
return VisitBitBlocks(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t i) {
ARROW_UNUSED(i);
auto value = std::string_view(data + cur_offset, *offsets - cur_offset);
cur_offset = *offsets++;
return valid_func(value);
},
[&]() {
cur_offset = *offsets++;
return null_func();
});
}
template <typename ValidFunc, typename NullFunc>
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
using offset_type = typename T::offset_type;
constexpr uint8_t empty_value = 0;
if (arr.length == 0) {
return;
}
const offset_type* offsets = arr.GetValues<offset_type>(1);
const uint8_t* data;
if (arr.buffers[2].data == NULLPTR) {
data = &empty_value;
} else {
// Do not apply the array offset to the values array; the value_offsets
// index the non-sliced values array.
data = arr.GetValues<uint8_t>(2, /*absolute_offset=*/0);
}
VisitBitBlocksVoid(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t i) {
auto value = std::string_view(reinterpret_cast<const char*>(data + offsets[i]),
offsets[i + 1] - offsets[i]);
valid_func(value);
},
std::forward<NullFunc>(null_func));
}
};
// BinaryView, StringView...
template <typename T>
struct ArraySpanInlineVisitor<T, enable_if_binary_view_like<T>> {
using c_type = std::string_view;
template <typename ValidFunc, typename NullFunc>
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
if (arr.length == 0) {
return Status::OK();
}
auto* s = arr.GetValues<BinaryViewType::c_type>(1);
auto* data_buffers = arr.GetVariadicBuffers().data();
return VisitBitBlocks(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t index) {
return valid_func(util::FromBinaryView(s[index], data_buffers));
},
[&]() { return null_func(); });
}
template <typename ValidFunc, typename NullFunc>
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
if (arr.length == 0) {
return;
}
auto* s = arr.GetValues<BinaryViewType::c_type>(1);
auto* data_buffers = arr.GetVariadicBuffers().data();
VisitBitBlocksVoid(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t index) { valid_func(util::FromBinaryView(s[index], data_buffers)); },
std::forward<NullFunc>(null_func));
}
};
// FixedSizeBinary, Decimal128
template <typename T>
struct ArraySpanInlineVisitor<T, enable_if_fixed_size_binary<T>> {
using c_type = std::string_view;
template <typename ValidFunc, typename NullFunc>
static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
const int32_t byte_width = arr.type->byte_width();
const char* data = arr.GetValues<char>(1,
/*absolute_offset=*/arr.offset * byte_width);
return VisitBitBlocks(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t i) {
auto value = std::string_view(data, byte_width);
data += byte_width;
return valid_func(value);
},
[&]() {
data += byte_width;
return null_func();
});
}
template <typename ValidFunc, typename NullFunc>
static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
NullFunc&& null_func) {
const int32_t byte_width = arr.type->byte_width();
const char* data = arr.GetValues<char>(1,
/*absolute_offset=*/arr.offset * byte_width);
VisitBitBlocksVoid(
arr.buffers[0].data, arr.offset, arr.length,
[&](int64_t i) {
valid_func(std::string_view(data, byte_width));
data += byte_width;
},
[&]() {
data += byte_width;
null_func();
});
}
};
} // namespace internal
template <typename T, typename ValidFunc, typename NullFunc>
typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
return internal::ArraySpanInlineVisitor<T>::VisitStatus(
arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
}
template <typename T, typename ValidFunc, typename NullFunc>
typename internal::call_traits::enable_if_return<ValidFunc, void>::type
VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
return internal::ArraySpanInlineVisitor<T>::VisitVoid(
arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
}
// Visit an array's data values, in order, without overhead.
//
// The Visit method's `visitor` argument should be an object with two public methods:
// - Status VisitNull()
// - Status VisitValue(<scalar>)
//
// The scalar value's type depends on the array data type:
// - the type's `c_type`, if any
// - for boolean arrays, a `bool`
// - for binary, string, large binary and string, binary and string view, and fixed-size
// binary arrays, a `std::string_view`
template <typename T>
struct ArraySpanVisitor {
using InlineVisitorType = internal::ArraySpanInlineVisitor<T>;
using c_type = typename InlineVisitorType::c_type;
template <typename Visitor>
static Status Visit(const ArraySpan& arr, Visitor* visitor) {
return InlineVisitorType::VisitStatus(
arr, [visitor](c_type v) { return visitor->VisitValue(v); },
[visitor]() { return visitor->VisitNull(); });
}
};
// Visit a null bitmap, in order, without overhead.
//
// The given `ValidFunc` should be a callable with either of these signatures:
// - void()
// - Status()
//
// The `NullFunc` should have the same return type as `ValidFunc`.
template <typename ValidFunc, typename NullFunc>
typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
NullFunc&& null_func) {
internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
valid_bits_offset, num_values);
int64_t position = 0;
int64_t offset_position = valid_bits_offset;
while (position < num_values) {
internal::BitBlockCount block = bit_counter.NextBlock();
if (block.AllSet()) {
for (int64_t i = 0; i < block.length; ++i) {
ARROW_RETURN_NOT_OK(valid_func());
}
} else if (block.NoneSet()) {
for (int64_t i = 0; i < block.length; ++i) {
ARROW_RETURN_NOT_OK(null_func());
}
} else {
for (int64_t i = 0; i < block.length; ++i) {
ARROW_RETURN_NOT_OK(bit_util::GetBit(valid_bits, offset_position + i)
? valid_func()
: null_func());
}
}
position += block.length;
offset_position += block.length;
}
return Status::OK();
}
template <typename ValidFunc, typename NullFunc>
typename internal::call_traits::enable_if_return<ValidFunc, void>::type
VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
NullFunc&& null_func) {
internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
valid_bits_offset, num_values);
int64_t position = 0;
int64_t offset_position = valid_bits_offset;
while (position < num_values) {
internal::BitBlockCount block = bit_counter.NextBlock();
if (block.AllSet()) {
for (int64_t i = 0; i < block.length; ++i) {
valid_func();
}
} else if (block.NoneSet()) {
for (int64_t i = 0; i < block.length; ++i) {
null_func();
}
} else {
for (int64_t i = 0; i < block.length; ++i) {
bit_util::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
}
}
position += block.length;
offset_position += block.length;
}
}
} // namespace arrow