Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / array / array_nested.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// Array accessor classes for List, LargeList, ListView, LargeListView, FixedSizeList,
// Map, Struct, and Union

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

/// \addtogroup nested-arrays
///
/// @{

// ----------------------------------------------------------------------
// VarLengthListLikeArray

template <typename TYPE>
class VarLengthListLikeArray;

namespace internal {

// Private helper for [Large]List[View]Array::SetData.
// Unfortunately, trying to define VarLengthListLikeArray::SetData outside of this header
// doesn't play well with MSVC.
template <typename TYPE>
void SetListData(VarLengthListLikeArray<TYPE>* self,
                 const std::shared_ptr<ArrayData>& data,
                 Type::type expected_type_id = TYPE::type_id);

/// \brief A version of Flatten that keeps recursively flattening until an array of
/// non-list values is reached.
///
/// Array types considered to be lists by this function:
///  - list
///  - large_list
///  - list_view
///  - large_list_view
///  - fixed_size_list
///
/// \see ListArray::Flatten
ARROW_EXPORT Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(
    const Array& in_array, MemoryPool* memory_pool);

}  // namespace internal

/// Base class for variable-sized list and list-view arrays, regardless of offset size.
template <typename TYPE>
class VarLengthListLikeArray : public Array {
 public:
  using TypeClass = TYPE;
  using offset_type = typename TypeClass::offset_type;

  const TypeClass* var_length_list_like_type() const { return this->list_type_; }

  /// \brief Return array object containing the list's values
  ///
  /// Note that this buffer does not account for any slice offset or length.
  const std::shared_ptr<Array>& values() const { return values_; }

  /// Note that this buffer does not account for any slice offset or length.
  const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[1]; }

  const std::shared_ptr<DataType>& value_type() const { return list_type_->value_type(); }

  /// Return pointer to raw value offsets accounting for any slice offset
  const offset_type* raw_value_offsets() const { return raw_value_offsets_; }

  // The following functions will not perform boundschecking

  offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }

  /// \brief Return the size of the value at a particular index
  ///
  /// Since non-empty null lists and list-views are possible, avoid calling this
  /// function when the list at slot i is null.
  ///
  /// \pre IsValid(i)
  virtual offset_type value_length(int64_t i) const = 0;

  /// \pre IsValid(i)
  std::shared_ptr<Array> value_slice(int64_t i) const {
    return values_->Slice(value_offset(i), value_length(i));
  }

  /// \brief Flatten all level recursively until reach a non-list type, and return
  /// a non-list type Array.
  ///
  /// \see internal::FlattenLogicalListRecursively
  Result<std::shared_ptr<Array>> FlattenRecursively(
      MemoryPool* memory_pool = default_memory_pool()) const {
    return internal::FlattenLogicalListRecursively(*this, memory_pool);
  }

 protected:
  friend void internal::SetListData<TYPE>(VarLengthListLikeArray<TYPE>* self,
                                          const std::shared_ptr<ArrayData>& data,
                                          Type::type expected_type_id);

  const TypeClass* list_type_ = NULLPTR;
  std::shared_ptr<Array> values_;
  const offset_type* raw_value_offsets_ = NULLPTR;
};

// ----------------------------------------------------------------------
// ListArray / LargeListArray

template <typename TYPE>
class BaseListArray : public VarLengthListLikeArray<TYPE> {
 public:
  using TypeClass = TYPE;
  using offset_type = typename TYPE::offset_type;

  const TypeClass* list_type() const { return this->var_length_list_like_type(); }

  /// \brief Return the size of the value at a particular index
  ///
  /// Since non-empty null lists are possible, avoid calling this
  /// function when the list at slot i is null.
  ///
  /// \pre IsValid(i)
  offset_type value_length(int64_t i) const final {
    return this->raw_value_offsets_[i + 1] - this->raw_value_offsets_[i];
  }
};

/// Concrete Array class for list data
class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
 public:
  explicit ListArray(std::shared_ptr<ArrayData> data);

  ListArray(std::shared_ptr<DataType> type, int64_t length,
            std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
            std::shared_ptr<Buffer> null_bitmap = NULLPTR,
            int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Construct ListArray from array of offsets and child value array
  ///
  /// This function does the bare minimum of validation of the offsets and
  /// input types, and will allocate a new offsets array if necessary (i.e. if
  /// the offsets contain any nulls). If the offsets do not have nulls, they
  /// are assumed to be well-formed.
  ///
  /// If a null_bitmap is not provided, the nulls will be inferred from the offsets'
  /// null bitmap. But if a null_bitmap is provided, the offsets array can't have nulls.
  ///
  /// And when a null_bitmap is provided, the offsets array cannot be a slice (i.e. an
  /// array with offset() > 0).
  ///
  /// \param[in] offsets Array containing n + 1 offsets encoding length and
  /// size. Must be of int32 type
  /// \param[in] values Array containing list values
  /// \param[in] pool MemoryPool in case new offsets array needs to be
  /// allocated because of null values
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  static Result<std::shared_ptr<ListArray>> FromArrays(
      const Array& offsets, const Array& values, MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  static Result<std::shared_ptr<ListArray>> FromArrays(
      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
      MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  /// \brief Build a ListArray from a ListViewArray
  static Result<std::shared_ptr<ListArray>> FromListView(const ListViewArray& source,
                                                         MemoryPool* pool);

  /// \brief Return an Array that is a concatenation of the lists in this array.
  ///
  /// Note that it's different from `values()` in that it takes into
  /// consideration of this array's offsets as well as null elements backed
  /// by non-empty lists (they are skipped, thus copying may be needed).
  Result<std::shared_ptr<Array>> Flatten(
      MemoryPool* memory_pool = default_memory_pool()) const;

  /// \brief Return list offsets as an Int32Array
  ///
  /// The returned array will not have a validity bitmap, so you cannot expect
  /// to pass it to ListArray::FromArrays() and get back the same list array
  /// if the original one has nulls.
  std::shared_ptr<Array> offsets() const;

 protected:
  // This constructor defers SetData to a derived array class
  ListArray() = default;

  void SetData(const std::shared_ptr<ArrayData>& data);
};

/// Concrete Array class for large list data (with 64-bit offsets)
class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
 public:
  explicit LargeListArray(const std::shared_ptr<ArrayData>& data);

  LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
                 const std::shared_ptr<Buffer>& value_offsets,
                 const std::shared_ptr<Array>& values,
                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Construct LargeListArray from array of offsets and child value array
  ///
  /// This function does the bare minimum of validation of the offsets and
  /// input types, and will allocate a new offsets array if necessary (i.e. if
  /// the offsets contain any nulls). If the offsets do not have nulls, they
  /// are assumed to be well-formed.
  ///
  /// If a null_bitmap is not provided, the nulls will be inferred from the offsets'
  /// null bitmap. But if a null_bitmap is provided, the offsets array can't have nulls.
  ///
  /// And when a null_bitmap is provided, the offsets array cannot be a slice (i.e. an
  /// array with offset() > 0).
  ///
  /// \param[in] offsets Array containing n + 1 offsets encoding length and
  /// size. Must be of int64 type
  /// \param[in] values Array containing list values
  /// \param[in] pool MemoryPool in case new offsets array needs to be
  /// allocated because of null values
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  static Result<std::shared_ptr<LargeListArray>> FromArrays(
      const Array& offsets, const Array& values, MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  static Result<std::shared_ptr<LargeListArray>> FromArrays(
      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
      MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  /// \brief Build a LargeListArray from a LargeListViewArray
  static Result<std::shared_ptr<LargeListArray>> FromListView(
      const LargeListViewArray& source, MemoryPool* pool);

  /// \brief Return an Array that is a concatenation of the lists in this array.
  ///
  /// Note that it's different from `values()` in that it takes into
  /// consideration of this array's offsets as well as null elements backed
  /// by non-empty lists (they are skipped, thus copying may be needed).
  Result<std::shared_ptr<Array>> Flatten(
      MemoryPool* memory_pool = default_memory_pool()) const;

  /// \brief Return list offsets as an Int64Array
  std::shared_ptr<Array> offsets() const;

 protected:
  void SetData(const std::shared_ptr<ArrayData>& data);
};

// ----------------------------------------------------------------------
// ListViewArray / LargeListViewArray

template <typename TYPE>
class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
 public:
  using TypeClass = TYPE;
  using offset_type = typename TYPE::offset_type;

  const TypeClass* list_view_type() const { return this->var_length_list_like_type(); }

  /// \brief Note that this buffer does not account for any slice offset or length.
  const std::shared_ptr<Buffer>& value_sizes() const { return this->data_->buffers[2]; }

  /// \brief Return pointer to raw value offsets accounting for any slice offset
  const offset_type* raw_value_sizes() const { return raw_value_sizes_; }

  /// \brief Return the size of the value at a particular index
  ///
  /// This should not be called if the list-view at slot i is null.
  /// The returned size in those cases could be any value from 0 to the
  /// length of the child values array.
  ///
  /// \pre IsValid(i)
  offset_type value_length(int64_t i) const final { return this->raw_value_sizes_[i]; }

 protected:
  const offset_type* raw_value_sizes_ = NULLPTR;
};

/// \brief Concrete Array class for list-view data
class ARROW_EXPORT ListViewArray : public BaseListViewArray<ListViewType> {
 public:
  explicit ListViewArray(std::shared_ptr<ArrayData> data);

  ListViewArray(std::shared_ptr<DataType> type, int64_t length,
                std::shared_ptr<Buffer> value_offsets,
                std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array> values,
                std::shared_ptr<Buffer> null_bitmap = NULLPTR,
                int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Construct ListViewArray from array of offsets, sizes, and child
  /// value array
  ///
  /// Construct a ListViewArray using buffers from offsets and sizes arrays
  /// that project views into the child values array.
  ///
  /// This function does the bare minimum of validation of the offsets/sizes and
  /// input types. The offset and length of the offsets and sizes arrays must
  /// match and that will be checked, but their contents will be assumed to be
  /// well-formed.
  ///
  /// If a null_bitmap is not provided, the nulls will be inferred from the
  /// offsets's null bitmap. But if a null_bitmap is provided, the offsets array
  /// can't have nulls.
  ///
  /// And when a null_bitmap is provided, neither the offsets or sizes array can be a
  /// slice (i.e. an array with offset() > 0).
  ///
  /// \param[in] offsets An array of int32 offsets into the values array. NULL values are
  /// supported if the corresponding values in sizes is NULL or 0.
  /// \param[in] sizes An array containing the int32 sizes of every view. NULL values are
  /// taken to represent a NULL list-view in the array being created.
  /// \param[in] values Array containing list values
  /// \param[in] pool MemoryPool
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  static Result<std::shared_ptr<ListViewArray>> FromArrays(
      const Array& offsets, const Array& sizes, const Array& values,
      MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  static Result<std::shared_ptr<ListViewArray>> FromArrays(
      std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
      const Array& values, MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  /// \brief Build a ListViewArray from a ListArray
  static Result<std::shared_ptr<ListViewArray>> FromList(const ListArray& list_array,
                                                         MemoryPool* pool);

  /// \brief Return an Array that is a concatenation of the list-views in this array.
  ///
  /// Note that it's different from `values()` in that it takes into
  /// consideration this array's offsets (which can be in any order)
  /// and sizes. Nulls are skipped.
  ///
  /// This function invokes Concatenate() if list-views are non-contiguous. It
  /// will try to minimize the number of array slices passed to Concatenate() by
  /// maximizing the size of each slice (containing as many contiguous
  /// list-views as possible).
  Result<std::shared_ptr<Array>> Flatten(
      MemoryPool* memory_pool = default_memory_pool()) const;

  /// \brief Return list-view offsets as an Int32Array
  ///
  /// The returned array will not have a validity bitmap, so you cannot expect
  /// to pass it to ListArray::FromArrays() and get back the same list array
  /// if the original one has nulls.
  std::shared_ptr<Array> offsets() const;

  /// \brief Return list-view sizes as an Int32Array
  ///
  /// The returned array will not have a validity bitmap, so you cannot expect
  /// to pass it to ListViewArray::FromArrays() and get back the same list
  /// array if the original one has nulls.
  std::shared_ptr<Array> sizes() const;

 protected:
  // This constructor defers SetData to a derived array class
  ListViewArray() = default;

  void SetData(const std::shared_ptr<ArrayData>& data);
};

/// \brief Concrete Array class for large list-view data (with 64-bit offsets
/// and sizes)
class ARROW_EXPORT LargeListViewArray : public BaseListViewArray<LargeListViewType> {
 public:
  explicit LargeListViewArray(std::shared_ptr<ArrayData> data);

  LargeListViewArray(std::shared_ptr<DataType> type, int64_t length,
                     std::shared_ptr<Buffer> value_offsets,
                     std::shared_ptr<Buffer> value_sizes, std::shared_ptr<Array> values,
                     std::shared_ptr<Buffer> null_bitmap = NULLPTR,
                     int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Construct LargeListViewArray from array of offsets, sizes, and child
  /// value array
  ///
  /// Construct an LargeListViewArray using buffers from offsets and sizes arrays
  /// that project views into the values array.
  ///
  /// This function does the bare minimum of validation of the offsets/sizes and
  /// input types. The offset and length of the offsets and sizes arrays must
  /// match and that will be checked, but their contents will be assumed to be
  /// well-formed.
  ///
  /// If a null_bitmap is not provided, the nulls will be inferred from the offsets' or
  /// sizes' null bitmap. Only one of these two is allowed to have a null bitmap. But if a
  /// null_bitmap is provided, the offsets array and the sizes array can't have nulls.
  ///
  /// And when a null_bitmap is provided, neither the offsets or sizes array can be a
  /// slice (i.e. an array with offset() > 0).
  ///
  /// \param[in] offsets An array of int64 offsets into the values array. NULL values are
  /// supported if the corresponding values in sizes is NULL or 0.
  /// \param[in] sizes An array containing the int64 sizes of every view. NULL values are
  /// taken to represent a NULL list-view in the array being created.
  /// \param[in] values Array containing list values
  /// \param[in] pool MemoryPool
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  static Result<std::shared_ptr<LargeListViewArray>> FromArrays(
      const Array& offsets, const Array& sizes, const Array& values,
      MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  static Result<std::shared_ptr<LargeListViewArray>> FromArrays(
      std::shared_ptr<DataType> type, const Array& offsets, const Array& sizes,
      const Array& values, MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  /// \brief Build a LargeListViewArray from a LargeListArray
  static Result<std::shared_ptr<LargeListViewArray>> FromList(
      const LargeListArray& list_array, MemoryPool* pool);

  /// \brief Return an Array that is a concatenation of the large list-views in this
  /// array.
  ///
  /// Note that it's different from `values()` in that it takes into
  /// consideration this array's offsets (which can be in any order)
  /// and sizes. Nulls are skipped.
  Result<std::shared_ptr<Array>> Flatten(
      MemoryPool* memory_pool = default_memory_pool()) const;

  /// \brief Return list-view offsets as an Int64Array
  ///
  /// The returned array will not have a validity bitmap, so you cannot expect
  /// to pass it to LargeListArray::FromArrays() and get back the same list array
  /// if the original one has nulls.
  std::shared_ptr<Array> offsets() const;

  /// \brief Return list-view sizes as an Int64Array
  ///
  /// The returned array will not have a validity bitmap, so you cannot expect
  /// to pass it to LargeListViewArray::FromArrays() and get back the same list
  /// array if the original one has nulls.
  std::shared_ptr<Array> sizes() const;

 protected:
  // This constructor defers SetData to a derived array class
  LargeListViewArray() = default;

  void SetData(const std::shared_ptr<ArrayData>& data);
};

// ----------------------------------------------------------------------
// MapArray

/// Concrete Array class for map data
///
/// NB: "value" in this context refers to a pair of a key and the corresponding item
class ARROW_EXPORT MapArray : public ListArray {
 public:
  using TypeClass = MapType;

  explicit MapArray(const std::shared_ptr<ArrayData>& data);

  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
           const std::shared_ptr<Buffer>& value_offsets,
           const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
           int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  MapArray(const std::shared_ptr<DataType>& type, int64_t length, BufferVector buffers,
           const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
           int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
           const std::shared_ptr<Buffer>& value_offsets,
           const std::shared_ptr<Array>& values,
           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
           int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Construct MapArray from array of offsets and child key, item arrays
  ///
  /// This function does the bare minimum of validation of the offsets and
  /// input types, and will allocate a new offsets array if necessary (i.e. if
  /// the offsets contain any nulls). If the offsets do not have nulls, they
  /// are assumed to be well-formed
  ///
  /// \param[in] offsets Array containing n + 1 offsets encoding length and
  /// size. Must be of int32 type
  /// \param[in] keys Array containing key values
  /// \param[in] items Array containing item values
  /// \param[in] pool MemoryPool in case new offsets array needs to be
  /// \param[in] null_bitmap Optional validity bitmap
  /// allocated because of null values
  static Result<std::shared_ptr<Array>> FromArrays(
      const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
      const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR);

  static Result<std::shared_ptr<Array>> FromArrays(
      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
      MemoryPool* pool = default_memory_pool(),
      std::shared_ptr<Buffer> null_bitmap = NULLPTR);

  const MapType* map_type() const { return map_type_; }

  /// \brief Return array object containing all map keys
  const std::shared_ptr<Array>& keys() const { return keys_; }

  /// \brief Return array object containing all mapped items
  const std::shared_ptr<Array>& items() const { return items_; }

  /// Validate child data before constructing the actual MapArray.
  static Status ValidateChildData(
      const std::vector<std::shared_ptr<ArrayData>>& child_data);

 protected:
  void SetData(const std::shared_ptr<ArrayData>& data);

  static Result<std::shared_ptr<Array>> FromArraysInternal(
      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
      MemoryPool* pool, std::shared_ptr<Buffer> null_bitmap = NULLPTR);

 private:
  const MapType* map_type_;
  std::shared_ptr<Array> keys_, items_;
};

// ----------------------------------------------------------------------
// FixedSizeListArray

/// Concrete Array class for fixed size list data
class ARROW_EXPORT FixedSizeListArray : public Array {
 public:
  using TypeClass = FixedSizeListType;
  using offset_type = TypeClass::offset_type;

  explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);

  FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
                     const std::shared_ptr<Array>& values,
                     const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
                     int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  const FixedSizeListType* list_type() const;

  /// \brief Return array object containing the list's values
  const std::shared_ptr<Array>& values() const;

  const std::shared_ptr<DataType>& value_type() const;

  // The following functions will not perform boundschecking
  int64_t value_offset(int64_t i) const {
    i += data_->offset;
    return list_size_ * i;
  }
  /// \brief Return the fixed-size of the values
  ///
  /// No matter the value of the index parameter, the result is the same.
  /// So even when the value at slot i is null, this function will return a
  /// non-zero size.
  ///
  /// \pre IsValid(i)
  int32_t value_length(int64_t i = 0) const {
    ARROW_UNUSED(i);
    return list_size_;
  }
  /// \pre IsValid(i)
  std::shared_ptr<Array> value_slice(int64_t i) const {
    return values_->Slice(value_offset(i), value_length(i));
  }

  /// \brief Return an Array that is a concatenation of the lists in this array.
  ///
  /// Note that it's different from `values()` in that it takes into
  /// consideration null elements (they are skipped, thus copying may be needed).
  Result<std::shared_ptr<Array>> Flatten(
      MemoryPool* memory_pool = default_memory_pool()) const;

  /// \brief Flatten all level recursively until reach a non-list type, and return
  /// a non-list type Array.
  ///
  /// \see internal::FlattenLogicalListRecursively
  Result<std::shared_ptr<Array>> FlattenRecursively(
      MemoryPool* memory_pool = default_memory_pool()) const {
    return internal::FlattenLogicalListRecursively(*this, memory_pool);
  }

  /// \brief Construct FixedSizeListArray from child value array and value_length
  ///
  /// \param[in] values Array containing list values
  /// \param[in] list_size The fixed length of each list
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  /// \return Will have length equal to values.length() / list_size
  static Result<std::shared_ptr<Array>> FromArrays(
      const std::shared_ptr<Array>& values, int32_t list_size,
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

  /// \brief Construct FixedSizeListArray from child value array and type
  ///
  /// \param[in] values Array containing list values
  /// \param[in] type The fixed sized list type
  /// \param[in] null_bitmap Optional validity bitmap
  /// \param[in] null_count Optional null count in null_bitmap
  /// \return Will have length equal to values.length() / type.list_size()
  static Result<std::shared_ptr<Array>> FromArrays(
      const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount);

 protected:
  void SetData(const std::shared_ptr<ArrayData>& data);
  int32_t list_size_;

 private:
  std::shared_ptr<Array> values_;
};

// ----------------------------------------------------------------------
// Struct

/// Concrete Array class for struct data
class ARROW_EXPORT StructArray : public Array {
 public:
  using TypeClass = StructType;

  explicit StructArray(const std::shared_ptr<ArrayData>& data);

  StructArray(const std::shared_ptr<DataType>& type, int64_t length,
              const std::vector<std::shared_ptr<Array>>& children,
              std::shared_ptr<Buffer> null_bitmap = NULLPTR,
              int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Return a StructArray from child arrays and field names.
  ///
  /// The length and data type are automatically inferred from the arguments.
  /// There should be at least one child array.
  static Result<std::shared_ptr<StructArray>> Make(
      const ArrayVector& children, const std::vector<std::string>& field_names,
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  /// \brief Return a StructArray from child arrays and fields.
  ///
  /// The length is automatically inferred from the arguments.
  /// There should be at least one child array.  This method does not
  /// check that field types and child array types are consistent.
  static Result<std::shared_ptr<StructArray>> Make(
      const ArrayVector& children, const FieldVector& fields,
      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
      int64_t null_count = kUnknownNullCount, int64_t offset = 0);

  const StructType* struct_type() const;

  // Return a shared pointer in case the requestor desires to share ownership
  // with this array.  The returned array has its offset, length and null
  // count adjusted.
  const std::shared_ptr<Array>& field(int pos) const;

  const ArrayVector& fields() const;

  /// Returns null if name not found
  std::shared_ptr<Array> GetFieldByName(const std::string& name) const;

  /// Indicate if field named `name` can be found unambiguously in the struct.
  Status CanReferenceFieldByName(const std::string& name) const;

  /// Indicate if fields named `names` can be found unambiguously in the struct.
  Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;

  /// \brief Flatten this array as a vector of arrays, one for each field
  ///
  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
  Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;

  /// \brief Get one of the child arrays, combining its null bitmap
  /// with the parent struct array's bitmap.
  ///
  /// \param[in] index Which child array to get
  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
  Result<std::shared_ptr<Array>> GetFlattenedField(
      int index, MemoryPool* pool = default_memory_pool()) const;

 private:
  // For caching boxed child data
  // XXX This is not handled in a thread-safe manner.
  mutable ArrayVector boxed_fields_;
};

// ----------------------------------------------------------------------
// Union

/// Base class for SparseUnionArray and DenseUnionArray
class ARROW_EXPORT UnionArray : public Array {
 public:
  using type_code_t = int8_t;

  /// Note that this buffer does not account for any slice offset
  const std::shared_ptr<Buffer>& type_codes() const { return data_->buffers[1]; }

  const type_code_t* raw_type_codes() const { return raw_type_codes_; }

  /// The logical type code of the value at index.
  type_code_t type_code(int64_t i) const { return raw_type_codes_[i]; }

  /// The physical child id containing value at index.
  int child_id(int64_t i) const { return union_type_->child_ids()[raw_type_codes_[i]]; }

  const UnionType* union_type() const { return union_type_; }

  UnionMode::type mode() const { return union_type_->mode(); }

  /// \brief Return the given field as an individual array.
  ///
  /// For sparse unions, the returned array has its offset, length and null
  /// count adjusted.
  std::shared_ptr<Array> field(int pos) const;

 protected:
  void SetData(std::shared_ptr<ArrayData> data);

  const type_code_t* raw_type_codes_;
  const UnionType* union_type_;

  // For caching boxed child data
  mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
};

/// Concrete Array class for sparse union data
class ARROW_EXPORT SparseUnionArray : public UnionArray {
 public:
  using TypeClass = SparseUnionType;

  explicit SparseUnionArray(std::shared_ptr<ArrayData> data);

  SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
                   std::shared_ptr<Buffer> type_ids, int64_t offset = 0);

  /// \brief Construct SparseUnionArray from type_ids and children
  ///
  /// This function does the bare minimum of validation of the input types.
  ///
  /// \param[in] type_ids An array of logical type ids for the union type
  /// \param[in] children Vector of children Arrays containing the data for each type.
  /// \param[in] type_codes Vector of type codes.
  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
                                             std::vector<type_code_t> type_codes) {
    return Make(std::move(type_ids), std::move(children), std::vector<std::string>{},
                std::move(type_codes));
  }

  /// \brief Construct SparseUnionArray with custom field names from type_ids and children
  ///
  /// This function does the bare minimum of validation of the input types.
  ///
  /// \param[in] type_ids An array of logical type ids for the union type
  /// \param[in] children Vector of children Arrays containing the data for each type.
  /// \param[in] field_names Vector of strings containing the name of each field.
  /// \param[in] type_codes Vector of type codes.
  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
                                             std::vector<std::string> field_names = {},
                                             std::vector<type_code_t> type_codes = {});

  const SparseUnionType* union_type() const {
    return internal::checked_cast<const SparseUnionType*>(union_type_);
  }

  /// \brief Get one of the child arrays, adjusting its null bitmap
  /// where the union array type code does not match.
  ///
  /// \param[in] index Which child array to get (i.e. the physical index, not the type
  /// code) \param[in] pool The pool to allocate null bitmaps from, if necessary
  Result<std::shared_ptr<Array>> GetFlattenedField(
      int index, MemoryPool* pool = default_memory_pool()) const;

 protected:
  void SetData(std::shared_ptr<ArrayData> data);
};

/// \brief Concrete Array class for dense union data
///
/// Note that union types do not have a validity bitmap
class ARROW_EXPORT DenseUnionArray : public UnionArray {
 public:
  using TypeClass = DenseUnionType;

  explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);

  DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
                  std::shared_ptr<Buffer> type_ids,
                  std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t offset = 0);

  /// \brief Construct DenseUnionArray from type_ids, value_offsets, and children
  ///
  /// This function does the bare minimum of validation of the offsets and
  /// input types.
  ///
  /// \param[in] type_ids An array of logical type ids for the union type
  /// \param[in] value_offsets An array of signed int32 values indicating the
  /// relative offset into the respective child array for the type in a given slot.
  /// The respective offsets for each child value array must be in order / increasing.
  /// \param[in] children Vector of children Arrays containing the data for each type.
  /// \param[in] type_codes Vector of type codes.
  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
                                             const Array& value_offsets,
                                             ArrayVector children,
                                             std::vector<type_code_t> type_codes) {
    return Make(type_ids, value_offsets, std::move(children), std::vector<std::string>{},
                std::move(type_codes));
  }

  /// \brief Construct DenseUnionArray with custom field names from type_ids,
  /// value_offsets, and children
  ///
  /// This function does the bare minimum of validation of the offsets and
  /// input types.
  ///
  /// \param[in] type_ids An array of logical type ids for the union type
  /// \param[in] value_offsets An array of signed int32 values indicating the
  /// relative offset into the respective child array for the type in a given slot.
  /// The respective offsets for each child value array must be in order / increasing.
  /// \param[in] children Vector of children Arrays containing the data for each type.
  /// \param[in] field_names Vector of strings containing the name of each field.
  /// \param[in] type_codes Vector of type codes.
  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
                                             const Array& value_offsets,
                                             ArrayVector children,
                                             std::vector<std::string> field_names = {},
                                             std::vector<type_code_t> type_codes = {});

  const DenseUnionType* union_type() const {
    return internal::checked_cast<const DenseUnionType*>(union_type_);
  }

  /// Note that this buffer does not account for any slice offset
  const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[2]; }

  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i]; }

  const int32_t* raw_value_offsets() const { return raw_value_offsets_; }

 protected:
  const int32_t* raw_value_offsets_;

  void SetData(const std::shared_ptr<ArrayData>& data);
};

/// @}

}  // namespace arrow