Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / array / builder_nested.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <limits>
#include <memory>
#include <utility>
#include <vector>

#include "arrow/array/array_nested.h"
#include "arrow/array/builder_base.h"
#include "arrow/array/data.h"
#include "arrow/buffer.h"
#include "arrow/buffer_builder.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

/// \addtogroup nested-builders
///
/// @{

// ----------------------------------------------------------------------
// VarLengthListLikeBuilder

template <typename TYPE>
class VarLengthListLikeBuilder : public ArrayBuilder {
 public:
  using TypeClass = TYPE;
  using offset_type = typename TypeClass::offset_type;

  /// Use this constructor to incrementally build the value array along with offsets and
  /// null bitmap.
  VarLengthListLikeBuilder(MemoryPool* pool,
                           std::shared_ptr<ArrayBuilder> const& value_builder,
                           const std::shared_ptr<DataType>& type,
                           int64_t alignment = kDefaultBufferAlignment)
      : ArrayBuilder(pool, alignment),
        offsets_builder_(pool, alignment),
        value_builder_(value_builder),
        value_field_(type->field(0)->WithType(NULLPTR)) {}

  VarLengthListLikeBuilder(MemoryPool* pool,
                           std::shared_ptr<ArrayBuilder> const& value_builder,
                           int64_t alignment = kDefaultBufferAlignment)
      : VarLengthListLikeBuilder(pool, value_builder,
                                 std::make_shared<TYPE>(value_builder->type()),
                                 alignment) {}

  ~VarLengthListLikeBuilder() override = default;

  Status Resize(int64_t capacity) override {
    if (ARROW_PREDICT_FALSE(capacity > maximum_elements())) {
      return Status::CapacityError(type_name(),
                                   " array cannot reserve space for more than ",
                                   maximum_elements(), " got ", capacity);
    }
    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));

    // One more than requested for list offsets
    const int64_t offsets_capacity =
        is_list_view(TYPE::type_id) ? capacity : capacity + 1;
    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(offsets_capacity));
    return ArrayBuilder::Resize(capacity);
  }

  void Reset() override {
    ArrayBuilder::Reset();
    offsets_builder_.Reset();
    value_builder_->Reset();
  }

  /// \brief Start a new variable-length list slot
  ///
  /// This function should be called before appending elements to the
  /// value builder. Elements appended to the value builder before this function
  /// is called for the first time, will not be members of any list value.
  ///
  /// After this function is called, list_length elements SHOULD be appended to
  /// the values builder. If this contract is violated, the behavior is defined by
  /// the concrete builder implementation and SHOULD NOT be relied upon unless
  /// the caller is specifically building a [Large]List or [Large]ListView array.
  ///
  /// For [Large]List arrays, the list slot length will be the number of elements
  /// appended to the values builder before the next call to Append* or Finish. For
  /// [Large]ListView arrays, the list slot length will be exactly list_length, but if
  /// Append* is called before at least list_length elements are appended to the values
  /// builder, the current list slot will share elements with the next list
  /// slots or an invalid [Large]ListView array will be generated because there
  /// aren't enough elements in the values builder to fill the list slots.
  ///
  /// If you're building a [Large]List and don't need to be compatible
  /// with [Large]ListView, then `BaseListBuilder::Append(bool is_valid)`
  /// is a simpler API.
  ///
  /// \pre if is_valid is false, list_length MUST be 0
  /// \param is_valid Whether the new list slot is valid
  /// \param list_length The number of elements in the list
  Status Append(bool is_valid, int64_t list_length) {
    ARROW_RETURN_NOT_OK(Reserve(1));
    assert(is_valid || list_length == 0);
    UnsafeAppendToBitmap(is_valid);
    UnsafeAppendDimensions(/*offset=*/value_builder_->length(), /*size=*/list_length);
    return Status::OK();
  }

  Status AppendNull() final {
    // Append() a null list slot with list_length=0.
    //
    // When building [Large]List arrays, elements being appended to the values builder
    // before the next call to Append* or Finish will extend the list slot length, but
    // that is totally fine because list arrays admit non-empty null list slots.
    //
    // In the case of [Large]ListViews that's not a problem either because the
    // list slot length remains zero.
    return Append(false, 0);
  }

  Status AppendNulls(int64_t length) final {
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(length, false);
    UnsafeAppendEmptyDimensions(/*num_values=*/length);
    return Status::OK();
  }

  /// \brief Append an empty list slot
  ///
  /// \post Another call to Append* or Finish should be made before appending to
  /// the values builder to ensure list slot remains empty
  Status AppendEmptyValue() final { return Append(true, 0); }

  /// \brief Append an empty list slot
  ///
  /// \post Another call to Append* or Finish should be made before appending to
  /// the values builder to ensure the last list slot remains empty
  Status AppendEmptyValues(int64_t length) final {
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(length, true);
    UnsafeAppendEmptyDimensions(/*num_values=*/length);
    return Status::OK();
  }

  /// \brief Vector append
  ///
  /// For list-array builders, the sizes are inferred from the offsets.
  /// BaseListBuilder<T> provides an implementation that doesn't take sizes, but
  /// this virtual function allows dispatching calls to both list-array and
  /// list-view-array builders (which need the sizes)
  ///
  /// \param offsets The offsets of the variable-length lists
  /// \param sizes The sizes of the variable-length lists
  /// \param length The number of offsets, sizes, and validity bits to append
  /// \param valid_bytes If passed, valid_bytes is of equal length to values,
  /// and any zero byte will be considered as a null for that slot
  virtual Status AppendValues(const offset_type* offsets, const offset_type* sizes,
                              int64_t length, const uint8_t* valid_bytes) = 0;

  Status AppendArraySlice(const ArraySpan& array, int64_t offset,
                          int64_t length) override {
    const offset_type* offsets = array.GetValues<offset_type>(1);
    [[maybe_unused]] const offset_type* sizes = NULLPTR;
    if constexpr (is_list_view(TYPE::type_id)) {
      sizes = array.GetValues<offset_type>(2);
    }
    static_assert(internal::may_have_validity_bitmap(TYPE::type_id));
    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
    ARROW_RETURN_NOT_OK(Reserve(length));
    for (int64_t row = offset; row < offset + length; row++) {
      const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
      int64_t size = 0;
      if (is_valid) {
        if constexpr (is_list_view(TYPE::type_id)) {
          size = sizes[row];
        } else {
          size = offsets[row + 1] - offsets[row];
        }
      }
      UnsafeAppendToBitmap(is_valid);
      UnsafeAppendDimensions(/*offset=*/value_builder_->length(), size);
      if (is_valid) {
        ARROW_RETURN_NOT_OK(
            value_builder_->AppendArraySlice(array.child_data[0], offsets[row], size));
      }
    }
    return Status::OK();
  }

  Status ValidateOverflow(int64_t new_elements) const {
    auto new_length = value_builder_->length() + new_elements;
    if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
      return Status::CapacityError(type_name(), " array cannot contain more than ",
                                   maximum_elements(), " elements, have ", new_elements);
    } else {
      return Status::OK();
    }
  }

  ArrayBuilder* value_builder() const { return value_builder_.get(); }

  // Cannot make this a static attribute because of linking issues
  static constexpr int64_t maximum_elements() {
    return std::numeric_limits<offset_type>::max() - 1;
  }

  std::shared_ptr<DataType> type() const override {
    return std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
  }

 private:
  static constexpr const char* type_name() {
    if constexpr (is_list_view(TYPE::type_id)) {
      return "ListView";
    } else {
      return "List";
    }
  }

 protected:
  /// \brief Append dimensions for num_values empty list slots.
  ///
  /// ListViewBuilder overrides this to also append the sizes.
  virtual void UnsafeAppendEmptyDimensions(int64_t num_values) {
    const int64_t offset = value_builder_->length();
    for (int64_t i = 0; i < num_values; ++i) {
      offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
    }
  }

  /// \brief Append dimensions for a single list slot.
  ///
  /// ListViewBuilder overrides this to also append the size.
  virtual void UnsafeAppendDimensions(int64_t offset, int64_t ARROW_ARG_UNUSED(size)) {
    offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
  }

  TypedBufferBuilder<offset_type> offsets_builder_;
  std::shared_ptr<ArrayBuilder> value_builder_;
  std::shared_ptr<Field> value_field_;
};

// ----------------------------------------------------------------------
// ListBuilder / LargeListBuilder

template <typename TYPE>
class BaseListBuilder : public VarLengthListLikeBuilder<TYPE> {
 private:
  using BASE = VarLengthListLikeBuilder<TYPE>;

 public:
  using TypeClass = TYPE;
  using offset_type = typename BASE::offset_type;

  using BASE::BASE;

  using BASE::Append;

  ~BaseListBuilder() override = default;

  /// \brief Start a new variable-length list slot
  ///
  /// This function should be called before beginning to append elements to the
  /// value builder
  Status Append(bool is_valid = true) {
    // The value_length parameter to BASE::Append(bool, int64_t) is ignored when
    // building a list array, so we can pass 0 here.
    return BASE::Append(is_valid, 0);
  }

  /// \brief Vector append
  ///
  /// If passed, valid_bytes is of equal length to values, and any zero byte
  /// will be considered as a null for that slot
  Status AppendValues(const offset_type* offsets, int64_t length,
                      const uint8_t* valid_bytes = NULLPTR) {
    ARROW_RETURN_NOT_OK(this->Reserve(length));
    this->UnsafeAppendToBitmap(valid_bytes, length);
    this->offsets_builder_.UnsafeAppend(offsets, length);
    return Status::OK();
  }

  Status AppendValues(const offset_type* offsets, const offset_type* sizes,
                      int64_t length, const uint8_t* valid_bytes) final {
    // Offsets are assumed to be valid, but the first length-1 sizes have to be
    // consistent with the offsets to partially rule out the possibility that the
    // caller is passing sizes that could work if building a list-view, but don't
    // work on building a list that requires offsets to be non-decreasing.
    //
    // CAUTION: the last size element (`sizes[length - 1]`) is not
    // validated and could be inconsistent with the offsets given in a
    // subsequent call to AppendValues.
#ifndef NDEBUG
    if (sizes) {
      for (int64_t i = 0; i < length - 1; ++i) {
        if (ARROW_PREDICT_FALSE(offsets[i] != offsets[i + 1] - sizes[i])) {
          if (!valid_bytes || valid_bytes[i]) {
            return Status::Invalid(
                "BaseListBuilder: sizes are inconsistent with offsets provided");
          }
        }
      }
    }
#endif
    return AppendValues(offsets, length, valid_bytes);
  }

  Status AppendValues(const offset_type* offsets, const offset_type* sizes,
                      int64_t length) {
    return AppendValues(offsets, sizes, length, /*valid_bytes=*/NULLPTR);
  }

  Status AppendNextOffset() {
    ARROW_RETURN_NOT_OK(this->ValidateOverflow(0));
    const int64_t num_values = this->value_builder_->length();
    return this->offsets_builder_.Append(static_cast<offset_type>(num_values));
  }

  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
    ARROW_RETURN_NOT_OK(AppendNextOffset());

    // Offset padding zeroed by BufferBuilder
    std::shared_ptr<Buffer> offsets;
    std::shared_ptr<Buffer> null_bitmap;
    ARROW_RETURN_NOT_OK(this->offsets_builder_.Finish(&offsets));
    ARROW_RETURN_NOT_OK(this->null_bitmap_builder_.Finish(&null_bitmap));

    if (this->value_builder_->length() == 0) {
      // Try to make sure we get a non-null values buffer (ARROW-2744)
      ARROW_RETURN_NOT_OK(this->value_builder_->Resize(0));
    }

    std::shared_ptr<ArrayData> items;
    ARROW_RETURN_NOT_OK(this->value_builder_->FinishInternal(&items));

    *out = ArrayData::Make(this->type(), this->length_,
                           {std::move(null_bitmap), std::move(offsets)},
                           {std::move(items)}, this->null_count_);
    this->Reset();
    return Status::OK();
  }
};

/// \class ListBuilder
/// \brief Builder class for variable-length list array value types
///
/// To use this class, you must append values to the child array builder and use
/// the Append function to delimit each distinct list value (once the values
/// have been appended to the child array) or use the bulk API to append
/// a sequence of offsets and null values.
///
/// A note on types.  Per arrow/type.h all types in the c++ implementation are
/// logical so even though this class always builds list array, this can
/// represent multiple different logical types.  If no logical type is provided
/// at construction time, the class defaults to List<T> where t is taken from the
/// value_builder/values that the object is constructed with.
class ARROW_EXPORT ListBuilder : public BaseListBuilder<ListType> {
 public:
  using BaseListBuilder::BaseListBuilder;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<ListArray>* out) { return FinishTyped(out); }
};

/// \class LargeListBuilder
/// \brief Builder class for large variable-length list array value types
///
/// Like ListBuilder, but to create large list arrays (with 64-bit offsets).
class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
 public:
  using BaseListBuilder::BaseListBuilder;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<LargeListArray>* out) { return FinishTyped(out); }
};

// ----------------------------------------------------------------------
// ListViewBuilder / LargeListViewBuilder

template <typename TYPE>
class BaseListViewBuilder : public VarLengthListLikeBuilder<TYPE> {
 private:
  using BASE = VarLengthListLikeBuilder<TYPE>;

 public:
  using TypeClass = TYPE;
  using offset_type = typename BASE::offset_type;

  using BASE::BASE;

  ~BaseListViewBuilder() override = default;

  Status Resize(int64_t capacity) override {
    ARROW_RETURN_NOT_OK(BASE::Resize(capacity));
    return sizes_builder_.Resize(capacity);
  }

  void Reset() override {
    BASE::Reset();
    sizes_builder_.Reset();
  }

  /// \brief Vector append
  ///
  /// If passed, valid_bytes is of equal length to values, and any zero byte
  /// will be considered as a null for that slot
  Status AppendValues(const offset_type* offsets, const offset_type* sizes,
                      int64_t length, const uint8_t* valid_bytes) final {
    ARROW_RETURN_NOT_OK(this->Reserve(length));
    this->UnsafeAppendToBitmap(valid_bytes, length);
    this->offsets_builder_.UnsafeAppend(offsets, length);
    this->sizes_builder_.UnsafeAppend(sizes, length);
    return Status::OK();
  }

  Status AppendValues(const offset_type* offsets, const offset_type* sizes,
                      int64_t length) {
    return AppendValues(offsets, sizes, length, /*valid_bytes=*/NULLPTR);
  }

  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
    // Offset and sizes padding zeroed by BufferBuilder
    std::shared_ptr<Buffer> null_bitmap;
    std::shared_ptr<Buffer> offsets;
    std::shared_ptr<Buffer> sizes;
    ARROW_RETURN_NOT_OK(this->null_bitmap_builder_.Finish(&null_bitmap));
    ARROW_RETURN_NOT_OK(this->offsets_builder_.Finish(&offsets));
    ARROW_RETURN_NOT_OK(this->sizes_builder_.Finish(&sizes));

    if (this->value_builder_->length() == 0) {
      // Try to make sure we get a non-null values buffer (ARROW-2744)
      ARROW_RETURN_NOT_OK(this->value_builder_->Resize(0));
    }

    std::shared_ptr<ArrayData> items;
    ARROW_RETURN_NOT_OK(this->value_builder_->FinishInternal(&items));

    *out = ArrayData::Make(this->type(), this->length_,
                           {std::move(null_bitmap), std::move(offsets), std::move(sizes)},
                           {std::move(items)}, this->null_count_);
    this->Reset();
    return Status::OK();
  }

 protected:
  void UnsafeAppendEmptyDimensions(int64_t num_values) override {
    for (int64_t i = 0; i < num_values; ++i) {
      this->offsets_builder_.UnsafeAppend(0);
    }
    for (int64_t i = 0; i < num_values; ++i) {
      this->sizes_builder_.UnsafeAppend(0);
    }
  }

  void UnsafeAppendDimensions(int64_t offset, int64_t size) override {
    this->offsets_builder_.UnsafeAppend(static_cast<offset_type>(offset));
    this->sizes_builder_.UnsafeAppend(static_cast<offset_type>(size));
  }

 private:
  TypedBufferBuilder<offset_type> sizes_builder_;
};

class ARROW_EXPORT ListViewBuilder final : public BaseListViewBuilder<ListViewType> {
 public:
  using BaseListViewBuilder::BaseListViewBuilder;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<ListViewArray>* out) { return FinishTyped(out); }
};

class ARROW_EXPORT LargeListViewBuilder final
    : public BaseListViewBuilder<LargeListViewType> {
 public:
  using BaseListViewBuilder::BaseListViewBuilder;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<LargeListViewArray>* out) { return FinishTyped(out); }
};

// ----------------------------------------------------------------------
// Map builder

/// \class MapBuilder
/// \brief Builder class for arrays of variable-size maps
///
/// To use this class, you must use the Append function to delimit each distinct
/// map before appending values to the key and item array builders, or use the
/// bulk API to append a sequence of offsets and null maps.
///
/// Key uniqueness and ordering are not validated.
class ARROW_EXPORT MapBuilder : public ArrayBuilder {
 public:
  /// Use this constructor to define the built array's type explicitly. If key_builder
  /// or item_builder has indeterminate type, this builder will also.
  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
             const std::shared_ptr<ArrayBuilder>& item_builder,
             const std::shared_ptr<DataType>& type);

  /// Use this constructor to infer the built array's type. If key_builder or
  /// item_builder has indeterminate type, this builder will also.
  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
             const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);

  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
             const std::shared_ptr<DataType>& type);

  Status Resize(int64_t capacity) override;
  void Reset() override;
  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<MapArray>* out) { return FinishTyped(out); }

  /// \brief Vector append
  ///
  /// If passed, valid_bytes is of equal length to values, and any zero byte
  /// will be considered as a null for that slot
  Status AppendValues(const int32_t* offsets, int64_t length,
                      const uint8_t* valid_bytes = NULLPTR);

  /// \brief Start a new variable-length map slot
  ///
  /// This function should be called before beginning to append elements to the
  /// key and item builders
  Status Append();

  Status AppendNull() final;

  Status AppendNulls(int64_t length) final;

  Status AppendEmptyValue() final;

  Status AppendEmptyValues(int64_t length) final;

  Status AppendArraySlice(const ArraySpan& array, int64_t offset,
                          int64_t length) override {
    const auto* offsets = array.GetValues<int32_t>(1);
    static_assert(internal::may_have_validity_bitmap(MapType::type_id));
    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
    for (int64_t row = offset; row < offset + length; row++) {
      const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
      if (is_valid) {
        ARROW_RETURN_NOT_OK(Append());
        const int64_t slot_length = offsets[row + 1] - offsets[row];
        // Add together the inner StructArray offset to the Map/List offset
        int64_t key_value_offset = array.child_data[0].offset + offsets[row];
        ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
            array.child_data[0].child_data[0], key_value_offset, slot_length));
        ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
            array.child_data[0].child_data[1], key_value_offset, slot_length));
      } else {
        ARROW_RETURN_NOT_OK(AppendNull());
      }
    }
    return Status::OK();
  }

  /// \brief Get builder to append keys.
  ///
  /// Append a key with this builder should be followed by appending
  /// an item or null value with item_builder().
  ArrayBuilder* key_builder() const { return key_builder_.get(); }

  /// \brief Get builder to append items
  ///
  /// Appending an item with this builder should have been preceded
  /// by appending a key with key_builder().
  ArrayBuilder* item_builder() const { return item_builder_.get(); }

  /// \brief Get builder to add Map entries as struct values.
  ///
  /// This is used instead of key_builder()/item_builder() and allows
  /// the Map to be built as a list of struct values.
  ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }

  std::shared_ptr<DataType> type() const override {
    // Key and Item builder may update types, but they don't contain the field names,
    // so we need to reconstruct the type. (See ARROW-13735.)
    return std::make_shared<MapType>(
        field(entries_name_,
              struct_({field(key_name_, key_builder_->type(), false),
                       field(item_name_, item_builder_->type(), item_nullable_)}),
              false),
        keys_sorted_);
  }

  Status ValidateOverflow(int64_t new_elements) {
    return list_builder_->ValidateOverflow(new_elements);
  }

 protected:
  inline Status AdjustStructBuilderLength();

 protected:
  bool keys_sorted_ = false;
  bool item_nullable_ = false;
  std::string entries_name_;
  std::string key_name_;
  std::string item_name_;
  std::shared_ptr<ListBuilder> list_builder_;
  std::shared_ptr<ArrayBuilder> key_builder_;
  std::shared_ptr<ArrayBuilder> item_builder_;
};

// ----------------------------------------------------------------------
// FixedSizeList builder

/// \class FixedSizeListBuilder
/// \brief Builder class for fixed-length list array value types
class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
 public:
  using TypeClass = FixedSizeListType;

  /// Use this constructor to define the built array's type explicitly. If value_builder
  /// has indeterminate type, this builder will also.
  FixedSizeListBuilder(MemoryPool* pool,
                       std::shared_ptr<ArrayBuilder> const& value_builder,
                       int32_t list_size);

  /// Use this constructor to infer the built array's type. If value_builder has
  /// indeterminate type, this builder will also.
  FixedSizeListBuilder(MemoryPool* pool,
                       std::shared_ptr<ArrayBuilder> const& value_builder,
                       const std::shared_ptr<DataType>& type);

  Status Resize(int64_t capacity) override;
  void Reset() override;
  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<FixedSizeListArray>* out) { return FinishTyped(out); }

  /// \brief Append a valid fixed length list.
  ///
  /// This function affects only the validity bitmap; the child values must be appended
  /// using the child array builder.
  Status Append();

  /// \brief Vector append
  ///
  /// If passed, valid_bytes will be read and any zero byte
  /// will cause the corresponding slot to be null
  ///
  /// This function affects only the validity bitmap; the child values must be appended
  /// using the child array builder. This includes appending nulls for null lists.
  /// XXX this restriction is confusing, should this method be omitted?
  Status AppendValues(int64_t length, const uint8_t* valid_bytes = NULLPTR);

  /// \brief Append a null fixed length list.
  ///
  /// The child array builder will have the appropriate number of nulls appended
  /// automatically.
  Status AppendNull() final;

  /// \brief Append length null fixed length lists.
  ///
  /// The child array builder will have the appropriate number of nulls appended
  /// automatically.
  Status AppendNulls(int64_t length) final;

  Status ValidateOverflow(int64_t new_elements);

  Status AppendEmptyValue() final;

  Status AppendEmptyValues(int64_t length) final;

  Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) final {
    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
    for (int64_t row = offset; row < offset + length; row++) {
      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
        ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(
            array.child_data[0], list_size_ * (array.offset + row), list_size_));
        ARROW_RETURN_NOT_OK(Append());
      } else {
        ARROW_RETURN_NOT_OK(AppendNull());
      }
    }
    return Status::OK();
  }

  ArrayBuilder* value_builder() const { return value_builder_.get(); }

  std::shared_ptr<DataType> type() const override {
    return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
  }

  // Cannot make this a static attribute because of linking issues
  static constexpr int64_t maximum_elements() {
    return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
  }

 protected:
  std::shared_ptr<Field> value_field_;
  const int32_t list_size_;
  std::shared_ptr<ArrayBuilder> value_builder_;
};

// ----------------------------------------------------------------------
// Struct

// ---------------------------------------------------------------------------------
// StructArray builder
/// Append, Resize and Reserve methods are acting on StructBuilder.
/// Please make sure all these methods of all child-builders' are consistently
/// called to maintain data-structure consistency.
class ARROW_EXPORT StructBuilder : public ArrayBuilder {
 public:
  /// If any of field_builders has indeterminate type, this builder will also
  StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
                std::vector<std::shared_ptr<ArrayBuilder>> field_builders);

  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

  /// \cond FALSE
  using ArrayBuilder::Finish;
  /// \endcond

  Status Finish(std::shared_ptr<StructArray>* out) { return FinishTyped(out); }

  /// Null bitmap is of equal length to every child field, and any zero byte
  /// will be considered as a null for that field, but users must using app-
  /// end methods or advance methods of the child builders' independently to
  /// insert data.
  Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(valid_bytes, length);
    return Status::OK();
  }

  /// Append an element to the Struct. All child-builders' Append method must
  /// be called independently to maintain data-structure consistency.
  Status Append(bool is_valid = true) {
    ARROW_RETURN_NOT_OK(Reserve(1));
    UnsafeAppendToBitmap(is_valid);
    return Status::OK();
  }

  /// \brief Append a null value. Automatically appends an empty value to each child
  /// builder.
  Status AppendNull() final {
    for (const auto& field : children_) {
      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
    }
    return Append(false);
  }

  /// \brief Append multiple null values. Automatically appends empty values to each
  /// child builder.
  Status AppendNulls(int64_t length) final {
    for (const auto& field : children_) {
      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
    }
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(length, false);
    return Status::OK();
  }

  Status AppendEmptyValue() final {
    for (const auto& field : children_) {
      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
    }
    return Append(true);
  }

  Status AppendEmptyValues(int64_t length) final {
    for (const auto& field : children_) {
      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
    }
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(length, true);
    return Status::OK();
  }

  Status AppendArraySlice(const ArraySpan& array, int64_t offset,
                          int64_t length) override {
    for (int i = 0; static_cast<size_t>(i) < children_.size(); i++) {
      ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(array.child_data[i],
                                                         array.offset + offset, length));
    }
    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
    ARROW_RETURN_NOT_OK(Reserve(length));
    UnsafeAppendToBitmap(validity, array.offset + offset, length);
    return Status::OK();
  }

  void Reset() override;

  ArrayBuilder* field_builder(int i) const { return children_[i].get(); }

  int num_fields() const { return static_cast<int>(children_.size()); }

  std::shared_ptr<DataType> type() const override;

 private:
  std::shared_ptr<DataType> type_;
};

/// @}

}  // namespace arrow