Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pyarrow / include / arrow / type.h
Size: Mime:
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <atomic>
#include <climits>
#include <cstdint>
#include <iosfwd>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "arrow/result.h"
#include "arrow/type_fwd.h"  // IWYU pragma: export
#include "arrow/util/checked_cast.h"
#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h"  // IWYU pragma: keep

namespace arrow {
namespace detail {

/// \defgroup numeric-datatypes Datatypes for numeric data
/// @{
/// @}

/// \defgroup binary-datatypes Datatypes for binary/string data
/// @{
/// @}

/// \defgroup temporal-datatypes Datatypes for temporal data
/// @{
/// @}

/// \defgroup nested-datatypes Datatypes for nested data
/// @{
/// @}

class ARROW_EXPORT Fingerprintable {
 public:
  virtual ~Fingerprintable();

  const std::string& fingerprint() const {
    auto p = fingerprint_.load();
    if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
      return *p;
    }
    return LoadFingerprintSlow();
  }

  const std::string& metadata_fingerprint() const {
    auto p = metadata_fingerprint_.load();
    if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
      return *p;
    }
    return LoadMetadataFingerprintSlow();
  }

 protected:
  const std::string& LoadFingerprintSlow() const;
  const std::string& LoadMetadataFingerprintSlow() const;

  virtual std::string ComputeFingerprint() const = 0;
  virtual std::string ComputeMetadataFingerprint() const = 0;

  mutable std::atomic<std::string*> fingerprint_{NULLPTR};
  mutable std::atomic<std::string*> metadata_fingerprint_{NULLPTR};
};

}  // namespace detail

/// EXPERIMENTAL: Layout specification for a data type
struct ARROW_EXPORT DataTypeLayout {
  enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };

  /// Layout specification for a single data type buffer
  struct BufferSpec {
    BufferKind kind;
    int64_t byte_width;  // For FIXED_WIDTH

    bool operator==(const BufferSpec& other) const {
      return kind == other.kind &&
             (kind != FIXED_WIDTH || byte_width == other.byte_width);
    }
    bool operator!=(const BufferSpec& other) const { return !(*this == other); }
  };

  static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
  static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
  static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
  static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }

  /// A vector of buffer layout specifications, one for each expected buffer
  std::vector<BufferSpec> buffers;
  /// Whether this type expects an associated dictionary array.
  bool has_dictionary = false;
  /// If this is provided, the number of buffers expected is only lower-bounded by
  /// buffers.size(). Buffers beyond this lower bound are expected to conform to
  /// variadic_spec.
  std::optional<BufferSpec> variadic_spec;

  explicit DataTypeLayout(std::vector<BufferSpec> buffers,
                          std::optional<BufferSpec> variadic_spec = {})
      : buffers(std::move(buffers)), variadic_spec(variadic_spec) {}
};

/// \brief Base class for all data types
///
/// Data types in this library are all *logical*. They can be expressed as
/// either a primitive physical type (bytes or bits of some fixed size), a
/// nested type consisting of other data types, or another data type (e.g. a
/// timestamp encoded as an int64).
///
/// Simple datatypes may be entirely described by their Type::type id, but
/// complex datatypes are usually parametric.
class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
                              public detail::Fingerprintable,
                              public util::EqualityComparable<DataType> {
 public:
  explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
  ~DataType() override;

  /// \brief Return whether the types are equal
  ///
  /// Types that are logically convertible from one to another (e.g. List<UInt8>
  /// and Binary) are NOT equal.
  bool Equals(const DataType& other, bool check_metadata = false) const;

  /// \brief Return whether the types are equal
  bool Equals(const std::shared_ptr<DataType>& other, bool check_metadata = false) const;

  /// \brief Return the child field at index i.
  const std::shared_ptr<Field>& field(int i) const { return children_[i]; }

  /// \brief Return the children fields associated with this type.
  const FieldVector& fields() const { return children_; }

  /// \brief Return the number of children fields associated with this type.
  int num_fields() const { return static_cast<int>(children_.size()); }

  /// \brief Apply the TypeVisitor::Visit() method specialized to the data type
  Status Accept(TypeVisitor* visitor) const;

  /// \brief A string representation of the type, including any children
  virtual std::string ToString(bool show_metadata = false) const = 0;

  /// \brief Return hash value (excluding metadata in child fields)
  size_t Hash() const;

  /// \brief A string name of the type, omitting any child fields
  ///
  /// \since 0.7.0
  virtual std::string name() const = 0;

  /// \brief Return the data type layout.  Children are not included.
  ///
  /// \note Experimental API
  virtual DataTypeLayout layout() const = 0;

  /// \brief Return the type category
  Type::type id() const { return id_; }

  /// \brief Return the type category of the storage type
  virtual Type::type storage_id() const { return id_; }

  /// \brief Returns the type's fixed byte width, if any. Returns -1
  /// for non-fixed-width types, and should only be used for
  /// subclasses of FixedWidthType
  virtual int32_t byte_width() const {
    int32_t num_bits = this->bit_width();
    return num_bits > 0 ? num_bits / 8 : -1;
  }

  /// \brief Returns the type's fixed bit width, if any. Returns -1
  /// for non-fixed-width types, and should only be used for
  /// subclasses of FixedWidthType
  virtual int bit_width() const { return -1; }

  // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
  // context.
  std::shared_ptr<DataType> GetSharedPtr() const {
    return const_cast<DataType*>(this)->shared_from_this();
  }

 protected:
  // Dummy version that returns a null string (indicating not implemented).
  // Subclasses should override for fast equality checks.
  std::string ComputeFingerprint() const override;

  // Generic versions that works for all regular types, nested or not.
  std::string ComputeMetadataFingerprint() const override;

  Type::type id_;
  FieldVector children_;

 private:
  ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
};

/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
/// dynamically created shared_ptr<DataType> if it needs to.
struct ARROW_EXPORT TypeHolder {
  const DataType* type = NULLPTR;
  std::shared_ptr<DataType> owned_type;

  TypeHolder() = default;
  TypeHolder(const TypeHolder& other) = default;
  TypeHolder& operator=(const TypeHolder& other) = default;
  TypeHolder(TypeHolder&& other) = default;
  TypeHolder& operator=(TypeHolder&& other) = default;

  TypeHolder(std::shared_ptr<DataType> owned_type)  // NOLINT implicit construction
      : type(owned_type.get()), owned_type(std::move(owned_type)) {}

  TypeHolder(const DataType* type)  // NOLINT implicit construction
      : type(type) {}

  Type::type id() const { return this->type->id(); }

  std::shared_ptr<DataType> GetSharedPtr() const {
    return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
  }

  const DataType& operator*() const { return *this->type; }

  operator bool() const { return this->type != NULLPTR; }

  bool operator==(const TypeHolder& other) const {
    if (type == other.type) return true;
    if (type == NULLPTR || other.type == NULLPTR) return false;
    return type->Equals(*other.type);
  }

  bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }

  bool operator==(const DataType& other) const {
    if (this->type == NULLPTR) return false;
    return other.Equals(*this->type);
  }

  bool operator!=(const DataType& other) const { return !(*this == other); }

  bool operator==(const std::shared_ptr<DataType>& other) const {
    return *this == *other;
  }

  bool operator!=(const TypeHolder& other) const { return !(*this == other); }

  std::string ToString(bool show_metadata = false) const {
    return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
  }

  static std::string ToString(const std::vector<TypeHolder>&, bool show_metadata = false);

  static std::vector<TypeHolder> FromTypes(
      const std::vector<std::shared_ptr<DataType>>& types);
};

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const DataType& type);

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const TypeHolder& type);

/// \brief Return the compatible physical data type
///
/// Some types may have distinct logical meanings but the exact same physical
/// representation.  For example, TimestampType has Int64Type as a physical
/// type (defined as TimestampType::PhysicalType).
///
/// The return value is as follows:
/// - if a `PhysicalType` alias exists in the concrete type class, return
///   an instance of `PhysicalType`.
/// - otherwise, return the input type itself.
std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);

/// \brief Base class for all fixed-width data types
class ARROW_EXPORT FixedWidthType : public DataType {
 public:
  using DataType::DataType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~FixedWidthType() override;
};

/// \brief Base class for all data types representing primitive values
class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
 public:
  using FixedWidthType::FixedWidthType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~PrimitiveCType() override;
};

/// \brief Base class for all numeric data types
class ARROW_EXPORT NumberType : public PrimitiveCType {
 public:
  using PrimitiveCType::PrimitiveCType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~NumberType() override;
};

/// \brief Base class for all integral data types
class ARROW_EXPORT IntegerType : public NumberType {
 public:
  using NumberType::NumberType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~IntegerType() override;
  virtual bool is_signed() const = 0;
};

/// \brief Base class for all floating-point data types
class ARROW_EXPORT FloatingPointType : public NumberType {
 public:
  using NumberType::NumberType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~FloatingPointType() override;
  enum Precision { HALF, SINGLE, DOUBLE };
  virtual Precision precision() const = 0;
};

/// \brief Base class for all parametric data types
class ParametricType {};

class ARROW_EXPORT NestedType : public DataType, public ParametricType {
 public:
  using DataType::DataType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~NestedType() override;
};

/// \brief The combination of a field name and data type, with optional metadata
///
/// Fields are used to describe the individual constituents of a
/// nested DataType or a Schema.
///
/// A field's metadata is represented by a KeyValueMetadata instance,
/// which holds arbitrary key-value pairs.
class ARROW_EXPORT Field : public detail::Fingerprintable,
                           public util::EqualityComparable<Field> {
 public:
  Field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
        std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR)
      : detail::Fingerprintable(),
        name_(std::move(name)),
        type_(std::move(type)),
        nullable_(nullable),
        metadata_(std::move(metadata)) {}

  ~Field() override;

  /// \brief Return the field's attached metadata
  std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }

  /// \brief Return whether the field has non-empty metadata
  bool HasMetadata() const;

  /// \brief Return a copy of this field with the given metadata attached to it
  std::shared_ptr<Field> WithMetadata(
      const std::shared_ptr<const KeyValueMetadata>& metadata) const;

  /// \brief EXPERIMENTAL: Return a copy of this field with the given metadata
  /// merged with existing metadata (any colliding keys will be overridden by
  /// the passed metadata)
  std::shared_ptr<Field> WithMergedMetadata(
      const std::shared_ptr<const KeyValueMetadata>& metadata) const;

  /// \brief Return a copy of this field without any metadata attached to it
  std::shared_ptr<Field> RemoveMetadata() const;

  /// \brief Return a copy of this field with the replaced type.
  std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;

  /// \brief Return a copy of this field with the replaced name.
  std::shared_ptr<Field> WithName(const std::string& name) const;

  /// \brief Return a copy of this field with the replaced nullability.
  std::shared_ptr<Field> WithNullable(bool nullable) const;

  /// \brief Options that control the behavior of `MergeWith`.
  /// Options are to be added to allow type conversions, including integer
  /// widening, promotion from integer to float, or conversion to or from boolean.
  struct ARROW_EXPORT MergeOptions : public util::ToStringOstreamable<MergeOptions> {
    /// If true, a Field of NullType can be unified with a Field of another type.
    /// The unified field will be of the other type and become nullable.
    /// Nullability will be promoted to the looser option (nullable if one is not
    /// nullable).
    bool promote_nullability = true;

    /// Allow a decimal to be unified with another decimal of the same
    /// width, adjusting scale and precision as appropriate. May fail
    /// if the adjustment is not possible.
    bool promote_decimal = false;

    /// Allow a decimal to be promoted to a float. The float type will
    /// not itself be promoted (e.g. Decimal128 + Float32 = Float32).
    bool promote_decimal_to_float = false;

    /// Allow an integer to be promoted to a decimal.
    ///
    /// May fail if the decimal has insufficient precision to
    /// accommodate the integer (see promote_numeric_width).
    bool promote_integer_to_decimal = false;

    /// Allow an integer of a given bit width to be promoted to a
    /// float; the result will be a float of an equal or greater bit
    /// width to both of the inputs. Examples:
    ///  - int8 + float32 = float32
    ///  - int32 + float32 = float64
    ///  - int32 + float64 = float64
    /// Because an int32 cannot always be represented exactly in the
    /// 24 bits of a float32 mantissa.
    bool promote_integer_to_float = false;

    /// Allow an unsigned integer of a given bit width to be promoted
    /// to a signed integer that fits into the signed type:
    /// uint + int16 = int16
    /// When widening is needed, set promote_numeric_width to true:
    /// uint16 + int16 = int32
    bool promote_integer_sign = false;

    /// Allow an integer, float, or decimal of a given bit width to be
    /// promoted to an equivalent type of a greater bit width.
    bool promote_numeric_width = false;

    /// Allow strings to be promoted to binary types. Promotion of fixed size
    /// binary types to variable sized formats, and binary to large binary,
    /// and string to large string.
    bool promote_binary = false;

    /// Second to millisecond, Time32 to Time64, Time32(SECOND) to Time32(MILLI), etc
    bool promote_temporal_unit = false;

    /// Allow promotion from a list to a large-list and from a fixed-size list to a
    /// variable sized list
    bool promote_list = false;

    /// Unify dictionary index types and dictionary value types.
    bool promote_dictionary = false;

    /// Allow merging ordered and non-ordered dictionaries.
    /// The result will be ordered if and only if both inputs
    /// are ordered.
    bool promote_dictionary_ordered = false;

    /// Get default options. Only NullType will be merged with other types.
    static MergeOptions Defaults() { return MergeOptions(); }
    /// Get permissive options. All options are enabled, except
    /// promote_dictionary_ordered.
    static MergeOptions Permissive();
    /// Get a human-readable representation of the options.
    std::string ToString() const;
  };

  /// \brief Merge the current field with a field of the same name.
  ///
  /// The two fields must be compatible, i.e:
  ///   - have the same name
  ///   - have the same type, or of compatible types according to `options`.
  ///
  /// The metadata of the current field is preserved; the metadata of the other
  /// field is discarded.
  Result<std::shared_ptr<Field>> MergeWith(
      const Field& other, MergeOptions options = MergeOptions::Defaults()) const;
  Result<std::shared_ptr<Field>> MergeWith(
      const std::shared_ptr<Field>& other,
      MergeOptions options = MergeOptions::Defaults()) const;

  FieldVector Flatten() const;

  /// \brief Indicate if fields are equals.
  ///
  /// \param[in] other field to check equality with.
  /// \param[in] check_metadata controls if it should check for metadata
  ///            equality.
  ///
  /// \return true if fields are equal, false otherwise.
  bool Equals(const Field& other, bool check_metadata = false) const;
  bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = false) const;

  /// \brief Indicate if fields are compatibles.
  ///
  /// See the criteria of MergeWith.
  ///
  /// \return true if fields are compatible, false otherwise.
  bool IsCompatibleWith(const Field& other) const;
  bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;

  /// \brief Return a string representation ot the field
  /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
  /// print keys and values in the output
  std::string ToString(bool show_metadata = false) const;

  /// \brief Return the field name
  const std::string& name() const { return name_; }
  /// \brief Return the field data type
  const std::shared_ptr<DataType>& type() const { return type_; }
  /// \brief Return whether the field is nullable
  bool nullable() const { return nullable_; }

  std::shared_ptr<Field> Copy() const;

 private:
  std::string ComputeFingerprint() const override;
  std::string ComputeMetadataFingerprint() const override;

  // Field name
  std::string name_;

  // The field's data type
  std::shared_ptr<DataType> type_;

  // Fields can be nullable
  bool nullable_;

  // The field's metadata, if any
  std::shared_ptr<const KeyValueMetadata> metadata_;

  ARROW_DISALLOW_COPY_AND_ASSIGN(Field);
};

ARROW_EXPORT void PrintTo(const Field& field, std::ostream* os);

namespace detail {

template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
class CTypeImpl : public BASE {
 public:
  static constexpr Type::type type_id = TYPE_ID;
  using c_type = C_TYPE;
  using PhysicalType = DERIVED;

  CTypeImpl() : BASE(TYPE_ID) {}

  int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }

  DataTypeLayout layout() const override {
    return DataTypeLayout(
        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(C_TYPE))});
  }

  std::string name() const override { return DERIVED::type_name(); }

  std::string ToString(bool show_metadata = false) const override { return this->name(); }
};

template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
constexpr Type::type CTypeImpl<DERIVED, BASE, TYPE_ID, C_TYPE>::type_id;

template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, IntegerType, TYPE_ID, C_TYPE> {
  bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
};

}  // namespace detail

/// Concrete type class for always-null data
class ARROW_EXPORT NullType : public DataType {
 public:
  static constexpr Type::type type_id = Type::NA;

  static constexpr const char* type_name() { return "null"; }

  NullType() : DataType(Type::NA) {}

  std::string ToString(bool show_metadata = false) const override;

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::AlwaysNull()});
  }

  std::string name() const override { return "null"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for boolean data
class ARROW_EXPORT BooleanType
    : public detail::CTypeImpl<BooleanType, PrimitiveCType, Type::BOOL, bool> {
 public:
  static constexpr const char* type_name() { return "bool"; }

  // BooleanType within arrow use a single bit instead of the C 8-bits layout.
  int bit_width() const final { return 1; }

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::Bitmap()});
  }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \addtogroup numeric-datatypes
///
/// @{

/// Concrete type class for unsigned 8-bit integer data
class ARROW_EXPORT UInt8Type
    : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
 public:
  static constexpr const char* type_name() { return "uint8"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for signed 8-bit integer data
class ARROW_EXPORT Int8Type
    : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
 public:
  static constexpr const char* type_name() { return "int8"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for unsigned 16-bit integer data
class ARROW_EXPORT UInt16Type
    : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
 public:
  static constexpr const char* type_name() { return "uint16"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for signed 16-bit integer data
class ARROW_EXPORT Int16Type
    : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
 public:
  static constexpr const char* type_name() { return "int16"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for unsigned 32-bit integer data
class ARROW_EXPORT UInt32Type
    : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
 public:
  static constexpr const char* type_name() { return "uint32"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for signed 32-bit integer data
class ARROW_EXPORT Int32Type
    : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
 public:
  static constexpr const char* type_name() { return "int32"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for unsigned 64-bit integer data
class ARROW_EXPORT UInt64Type
    : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
 public:
  static constexpr const char* type_name() { return "uint64"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for signed 64-bit integer data
class ARROW_EXPORT Int64Type
    : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
 public:
  static constexpr const char* type_name() { return "int64"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for 16-bit floating-point data
class ARROW_EXPORT HalfFloatType
    : public detail::CTypeImpl<HalfFloatType, FloatingPointType, Type::HALF_FLOAT,
                               uint16_t> {
 public:
  Precision precision() const override;
  static constexpr const char* type_name() { return "halffloat"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for 32-bit floating-point data (C "float")
class ARROW_EXPORT FloatType
    : public detail::CTypeImpl<FloatType, FloatingPointType, Type::FLOAT, float> {
 public:
  Precision precision() const override;
  static constexpr const char* type_name() { return "float"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for 64-bit floating-point data (C "double")
class ARROW_EXPORT DoubleType
    : public detail::CTypeImpl<DoubleType, FloatingPointType, Type::DOUBLE, double> {
 public:
  Precision precision() const override;
  static constexpr const char* type_name() { return "double"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// @}

/// \brief Base class for all variable-size binary data types
class ARROW_EXPORT BaseBinaryType : public DataType {
 public:
  using DataType::DataType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~BaseBinaryType() override;
};

constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;

/// \addtogroup binary-datatypes
///
/// @{

/// \brief Concrete type class for variable-size binary data
class ARROW_EXPORT BinaryType : public BaseBinaryType {
 public:
  static constexpr Type::type type_id = Type::BINARY;
  static constexpr bool is_utf8 = false;
  using offset_type = int32_t;
  using PhysicalType = BinaryType;

  static constexpr const char* type_name() { return "binary"; }

  BinaryType() : BinaryType(Type::BINARY) {}

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(),
                           DataTypeLayout::FixedWidth(sizeof(offset_type)),
                           DataTypeLayout::VariableWidth()});
  }

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "binary"; }

 protected:
  std::string ComputeFingerprint() const override;

  // Allow subclasses like StringType to change the logical type.
  explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
};

/// \brief Concrete type class for variable-size binary view data
class ARROW_EXPORT BinaryViewType : public DataType {
 public:
  static constexpr Type::type type_id = Type::BINARY_VIEW;
  static constexpr bool is_utf8 = false;
  using PhysicalType = BinaryViewType;

  static constexpr int kSize = 16;
  static constexpr int kInlineSize = 12;
  static constexpr int kPrefixSize = 4;

  /// Variable length string or binary with inline optimization for small values (12 bytes
  /// or fewer). This is similar to std::string_view except limited in size to INT32_MAX
  /// and at least the first four bytes of the string are copied inline (accessible
  /// without pointer dereference). This inline prefix allows failing comparisons early.
  /// Furthermore when dealing with short strings the CPU cache working set is reduced
  /// since many can be inline.
  ///
  /// This union supports two states:
  ///
  /// - Entirely inlined string data
  /// \code{.unparsed}
  ///                |----|--------------|
  ///                 ^    ^
  ///                 |    |
  ///              size    in-line string data, zero padded
  /// \endcode
  ///
  /// - Reference into a buffer
  /// \code{.unparsed}
  ///                |----|----|----|----|
  ///                 ^    ^    ^    ^
  ///                 |    |    |    |
  ///              size    |    |    `------.
  ///                  prefix   |           |
  ///                        buffer index   |
  ///                                  offset in buffer
  /// \endcode
  ///
  /// Adapted from TU Munich's UmbraDB [1], Velox, DuckDB.
  ///
  /// [1]: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf
  ///
  /// Alignment to 64 bits enables an aligned load of the size and prefix into
  /// a single 64 bit integer, which is useful to the comparison fast path.
  union alignas(int64_t) c_type {
    struct {
      int32_t size;
      std::array<uint8_t, kInlineSize> data;
    } inlined;

    struct {
      int32_t size;
      std::array<uint8_t, kPrefixSize> prefix;
      int32_t buffer_index;
      int32_t offset;
    } ref;

    /// The number of bytes viewed.
    int32_t size() const {
      // Size is in the common initial subsequence of each member of the union,
      // so accessing `inlined.size` is legal even if another member is active.
      return inlined.size;
    }

    /// True if the view's data is entirely stored inline.
    bool is_inline() const { return size() <= kInlineSize; }

    /// Return a pointer to the inline data of a view.
    ///
    /// For inline views, this points to the entire data of the view.
    /// For other views, this points to the 4 byte prefix.
    const uint8_t* inline_data() const& {
      // Since `ref.prefix` has the same address as `inlined.data`,
      // the branch will be trivially optimized out.
      return is_inline() ? inlined.data.data() : ref.prefix.data();
    }
    const uint8_t* inline_data() && = delete;
  };
  static_assert(sizeof(c_type) == kSize);
  static_assert(std::is_trivial_v<c_type>);

  static constexpr const char* type_name() { return "binary_view"; }

  BinaryViewType() : BinaryViewType(Type::BINARY_VIEW) {}

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(kSize)},
                          DataTypeLayout::VariableWidth());
  }

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "binary_view"; }

 protected:
  std::string ComputeFingerprint() const override;

  // Allow subclasses like StringType to change the logical type.
  explicit BinaryViewType(Type::type logical_type) : DataType(logical_type) {}
};

/// \brief Concrete type class for large variable-size binary data
class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
 public:
  static constexpr Type::type type_id = Type::LARGE_BINARY;
  static constexpr bool is_utf8 = false;
  using offset_type = int64_t;
  using PhysicalType = LargeBinaryType;

  static constexpr const char* type_name() { return "large_binary"; }

  LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(),
                           DataTypeLayout::FixedWidth(sizeof(offset_type)),
                           DataTypeLayout::VariableWidth()});
  }

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "large_binary"; }

 protected:
  std::string ComputeFingerprint() const override;

  // Allow subclasses like LargeStringType to change the logical type.
  explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
};

/// \brief Concrete type class for variable-size string data, utf8-encoded
class ARROW_EXPORT StringType : public BinaryType {
 public:
  static constexpr Type::type type_id = Type::STRING;
  static constexpr bool is_utf8 = true;
  using PhysicalType = BinaryType;

  static constexpr const char* type_name() { return "utf8"; }

  StringType() : BinaryType(Type::STRING) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "utf8"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for variable-size string data, utf8-encoded
class ARROW_EXPORT StringViewType : public BinaryViewType {
 public:
  static constexpr Type::type type_id = Type::STRING_VIEW;
  static constexpr bool is_utf8 = true;
  using PhysicalType = BinaryViewType;

  static constexpr const char* type_name() { return "utf8_view"; }

  StringViewType() : BinaryViewType(Type::STRING_VIEW) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "utf8_view"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for large variable-size string data, utf8-encoded
class ARROW_EXPORT LargeStringType : public LargeBinaryType {
 public:
  static constexpr Type::type type_id = Type::LARGE_STRING;
  static constexpr bool is_utf8 = true;
  using PhysicalType = LargeBinaryType;

  static constexpr const char* type_name() { return "large_utf8"; }

  LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "large_utf8"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for fixed-size binary data
class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
 public:
  static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
  static constexpr bool is_utf8 = false;

  static constexpr const char* type_name() { return "fixed_size_binary"; }

  explicit FixedSizeBinaryType(int32_t byte_width)
      : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
  explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
      : FixedWidthType(override_type_id), byte_width_(byte_width) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "fixed_size_binary"; }

  DataTypeLayout layout() const override {
    return DataTypeLayout(
        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(byte_width())});
  }

  int byte_width() const override { return byte_width_; }

  int bit_width() const override;

  // Validating constructor
  static Result<std::shared_ptr<DataType>> Make(int32_t byte_width);

 protected:
  std::string ComputeFingerprint() const override;

  int32_t byte_width_;
};

/// @}

/// \addtogroup numeric-datatypes
///
/// @{

/// \brief Base type class for (fixed-size) decimal data
class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
 public:
  explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision,
                       int32_t scale)
      : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}

  /// Constructs concrete decimal types
  static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
                                                int32_t scale);

  int32_t precision() const { return precision_; }
  int32_t scale() const { return scale_; }

  /// \brief Returns the number of bytes needed for precision.
  ///
  /// precision must be >= 1
  static int32_t DecimalSize(int32_t precision);

 protected:
  std::string ComputeFingerprint() const override;

  int32_t precision_;
  int32_t scale_;
};

/// \brief Concrete type class for 32-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer.  The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// As an example, `Decimal32Type(7, 3)` can exactly represent the numbers
/// 1234.567 and -1234.567 (encoded internally as the 32-bit integers
/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
///
/// Decimal32Type has a maximum precision of 9 significant digits
/// (also available as Decimal32Type::kMaxPrecision).
/// If higher precision is needed, consider using Decimal64Type,
/// Decimal128Type or Decimal256Type.
class ARROW_EXPORT Decimal32Type : public DecimalType {
 public:
  static constexpr Type::type type_id = Type::DECIMAL32;

  static constexpr const char* type_name() { return "decimal32"; }

  /// Decimal32Type constructor that aborts on invalid input.
  explicit Decimal32Type(int32_t precision, int32_t scale);

  /// Decimal32Type constructor that returns an error on invalid input
  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "decimal32"; }

  static constexpr int32_t kMinPrecision = 1;
  static constexpr int32_t kMaxPrecision = 9;
  static constexpr int32_t kByteWidth = 4;
};

/// \brief Concrete type class for 64-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer.  The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// As an example, `Decimal64Type(7, 3)` can exactly represent the numbers
/// 1234.567 and -1234.567 (encoded internally as the 64-bit integers
/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
///
/// Decimal64Type has a maximum precision of 18 significant digits
/// (also available as Decimal64Type::kMaxPrecision).
/// If higher precision is needed, consider using Decimal128Type or
/// Decimal256Type.
class ARROW_EXPORT Decimal64Type : public DecimalType {
 public:
  static constexpr Type::type type_id = Type::DECIMAL64;

  static constexpr const char* type_name() { return "decimal64"; }

  /// Decimal32Type constructor that aborts on invalid input.
  explicit Decimal64Type(int32_t precision, int32_t scale);

  /// Decimal32Type constructor that returns an error on invalid input
  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "decimal64"; }

  static constexpr int32_t kMinPrecision = 1;
  static constexpr int32_t kMaxPrecision = 18;
  static constexpr int32_t kByteWidth = 8;
};

/// \brief Concrete type class for 128-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer.  The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// As an example, `Decimal128Type(7, 3)` can exactly represent the numbers
/// 1234.567 and -1234.567 (encoded internally as the 128-bit integers
/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
///
/// Decimal128Type has a maximum precision of 38 significant digits
/// (also available as Decimal128Type::kMaxPrecision).
/// If higher precision is needed, consider using Decimal256Type.
class ARROW_EXPORT Decimal128Type : public DecimalType {
 public:
  static constexpr Type::type type_id = Type::DECIMAL128;

  static constexpr const char* type_name() { return "decimal128"; }

  /// Decimal128Type constructor that aborts on invalid input.
  explicit Decimal128Type(int32_t precision, int32_t scale);

  /// Decimal128Type constructor that returns an error on invalid input.
  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "decimal128"; }

  static constexpr int32_t kMinPrecision = 1;
  static constexpr int32_t kMaxPrecision = 38;
  static constexpr int32_t kByteWidth = 16;
};

/// \brief Concrete type class for 256-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer.  The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// Decimal256Type has a maximum precision of 76 significant digits.
/// (also available as Decimal256Type::kMaxPrecision).
///
/// For most use cases, the maximum precision offered by Decimal128Type
/// is sufficient, and it will result in a more compact and more efficient
/// encoding.
class ARROW_EXPORT Decimal256Type : public DecimalType {
 public:
  static constexpr Type::type type_id = Type::DECIMAL256;

  static constexpr const char* type_name() { return "decimal256"; }

  /// Decimal256Type constructor that aborts on invalid input.
  explicit Decimal256Type(int32_t precision, int32_t scale);

  /// Decimal256Type constructor that returns an error on invalid input.
  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "decimal256"; }

  static constexpr int32_t kMinPrecision = 1;
  static constexpr int32_t kMaxPrecision = 76;
  static constexpr int32_t kByteWidth = 32;
};

/// @}

/// \addtogroup nested-datatypes
///
/// @{

/// \brief Base class for all variable-size list data types
class ARROW_EXPORT BaseListType : public NestedType {
 public:
  using NestedType::NestedType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~BaseListType() override;
  const std::shared_ptr<Field>& value_field() const { return children_[0]; }

  const std::shared_ptr<DataType>& value_type() const { return children_[0]->type(); }
};

/// \brief Concrete type class for list data
///
/// List data is nested data where each value is a variable number of
/// child items.  Lists can be recursively nested, for example
/// list(list(int32)).
class ARROW_EXPORT ListType : public BaseListType {
 public:
  static constexpr Type::type type_id = Type::LIST;
  using offset_type = int32_t;

  static constexpr const char* type_name() { return "list"; }

  // List can contain any other logical value type
  explicit ListType(std::shared_ptr<DataType> value_type)
      : ListType(std::make_shared<Field>("item", std::move(value_type))) {}

  explicit ListType(std::shared_ptr<Field> value_field) : BaseListType(type_id) {
    children_ = {std::move(value_field)};
  }

  DataTypeLayout layout() const override {
    return DataTypeLayout(
        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
  }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "list"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for large list data
///
/// LargeListType is like ListType but with 64-bit rather than 32-bit offsets.
class ARROW_EXPORT LargeListType : public BaseListType {
 public:
  static constexpr Type::type type_id = Type::LARGE_LIST;
  using offset_type = int64_t;

  static constexpr const char* type_name() { return "large_list"; }

  // List can contain any other logical value type
  explicit LargeListType(std::shared_ptr<DataType> value_type)
      : LargeListType(std::make_shared<Field>("item", std::move(value_type))) {}

  explicit LargeListType(std::shared_ptr<Field> value_field) : BaseListType(type_id) {
    children_ = {std::move(value_field)};
  }

  DataTypeLayout layout() const override {
    return DataTypeLayout(
        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
  }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "large_list"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Type class for array of list views
class ARROW_EXPORT ListViewType : public BaseListType {
 public:
  static constexpr Type::type type_id = Type::LIST_VIEW;
  using offset_type = int32_t;

  static constexpr const char* type_name() { return "list_view"; }

  // ListView can contain any other logical value type
  explicit ListViewType(const std::shared_ptr<DataType>& value_type)
      : ListViewType(std::make_shared<Field>("item", value_type)) {}

  explicit ListViewType(const std::shared_ptr<Field>& value_field)
      : BaseListType(type_id) {
    children_ = {value_field};
  }

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(),
                           DataTypeLayout::FixedWidth(sizeof(offset_type)),
                           DataTypeLayout::FixedWidth(sizeof(offset_type))});
  }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "list_view"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for large list-view data
///
/// LargeListViewType is like ListViewType but with 64-bit rather than 32-bit offsets and
/// sizes.
class ARROW_EXPORT LargeListViewType : public BaseListType {
 public:
  static constexpr Type::type type_id = Type::LARGE_LIST_VIEW;
  using offset_type = int64_t;

  static constexpr const char* type_name() { return "large_list_view"; }

  // LargeListView can contain any other logical value type
  explicit LargeListViewType(const std::shared_ptr<DataType>& value_type)
      : LargeListViewType(std::make_shared<Field>("item", value_type)) {}

  explicit LargeListViewType(const std::shared_ptr<Field>& value_field)
      : BaseListType(type_id) {
    children_ = {value_field};
  }

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap(),
                           DataTypeLayout::FixedWidth(sizeof(offset_type)),
                           DataTypeLayout::FixedWidth(sizeof(offset_type))});
  }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "large_list_view"; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// \brief Concrete type class for map data
///
/// Map data is nested data where each value is a variable number of
/// key-item pairs.  Its physical representation is the same as
/// a list of `{key, item}` structs.
///
/// Maps can be recursively nested, for example map(utf8, map(utf8, int32)).
class ARROW_EXPORT MapType : public ListType {
 public:
  static constexpr Type::type type_id = Type::MAP;

  static constexpr const char* type_name() { return "map"; }

  MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
          bool keys_sorted = false);

  MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
          bool keys_sorted = false);

  MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
          bool keys_sorted = false);

  explicit MapType(std::shared_ptr<Field> value_field, bool keys_sorted = false);

  // Validating constructor
  static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<Field> value_field,
                                                bool keys_sorted = false);

  std::shared_ptr<Field> key_field() const { return value_type()->field(0); }
  std::shared_ptr<DataType> key_type() const { return key_field()->type(); }

  std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
  std::shared_ptr<DataType> item_type() const { return item_field()->type(); }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "map"; }

  bool keys_sorted() const { return keys_sorted_; }

 private:
  std::string ComputeFingerprint() const override;

  bool keys_sorted_;
};

/// \brief Concrete type class for fixed size list data
class ARROW_EXPORT FixedSizeListType : public BaseListType {
 public:
  static constexpr Type::type type_id = Type::FIXED_SIZE_LIST;
  // While the individual item size is 32-bit, the overall data size
  // (item size * list length) may not fit in a 32-bit int.
  using offset_type = int64_t;

  static constexpr const char* type_name() { return "fixed_size_list"; }

  // List can contain any other logical value type
  FixedSizeListType(std::shared_ptr<DataType> value_type, int32_t list_size)
      : FixedSizeListType(std::make_shared<Field>("item", std::move(value_type)),
                          list_size) {}

  FixedSizeListType(std::shared_ptr<Field> value_field, int32_t list_size)
      : BaseListType(type_id), list_size_(list_size) {
    children_ = {std::move(value_field)};
  }

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap()});
  }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "fixed_size_list"; }

  int32_t list_size() const { return list_size_; }

 protected:
  std::string ComputeFingerprint() const override;

  int32_t list_size_;
};

/// \brief Concrete type class for struct data
class ARROW_EXPORT StructType : public NestedType {
 public:
  static constexpr Type::type type_id = Type::STRUCT;

  static constexpr const char* type_name() { return "struct"; }

  explicit StructType(const FieldVector& fields);

  ~StructType() override;

  DataTypeLayout layout() const override {
    return DataTypeLayout({DataTypeLayout::Bitmap()});
  }

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "struct"; }

  /// Returns null if name not found
  std::shared_ptr<Field> GetFieldByName(const std::string& name) const;

  /// Return all fields having this name
  FieldVector GetAllFieldsByName(const std::string& name) const;

  /// Returns -1 if name not found or if there are multiple fields having the
  /// same name
  int GetFieldIndex(const std::string& name) const;

  /// \brief Return the indices of all fields having this name in sorted order
  std::vector<int> GetAllFieldIndices(const std::string& name) const;

  /// \brief Create a new StructType with field added at given index
  Result<std::shared_ptr<StructType>> AddField(int i,
                                               const std::shared_ptr<Field>& field) const;
  /// \brief Create a new StructType by removing the field at given index
  Result<std::shared_ptr<StructType>> RemoveField(int i) const;
  /// \brief Create a new StructType by changing the field at given index
  Result<std::shared_ptr<StructType>> SetField(int i,
                                               const std::shared_ptr<Field>& field) const;

 private:
  std::string ComputeFingerprint() const override;

  class Impl;
  std::unique_ptr<Impl> impl_;
};

/// \brief Base type class for union data
class ARROW_EXPORT UnionType : public NestedType {
 public:
  static constexpr int8_t kMaxTypeCode = 127;
  static constexpr int kInvalidChildId = -1;

  static Result<std::shared_ptr<DataType>> Make(
      const FieldVector& fields, const std::vector<int8_t>& type_codes,
      UnionMode::type mode = UnionMode::SPARSE) {
    if (mode == UnionMode::SPARSE) {
      return sparse_union(fields, type_codes);
    } else {
      return dense_union(fields, type_codes);
    }
  }

  DataTypeLayout layout() const override;

  std::string ToString(bool show_metadata = false) const override;

  /// The array of logical type ids.
  ///
  /// For example, the first type in the union might be denoted by the id 5
  /// (instead of 0).
  const std::vector<int8_t>& type_codes() const { return type_codes_; }

  /// An array mapping logical type ids to physical child ids.
  const std::vector<int>& child_ids() const { return child_ids_; }

  uint8_t max_type_code() const;

  UnionMode::type mode() const;

 protected:
  UnionType(FieldVector fields, std::vector<int8_t> type_codes, Type::type id);

  static Status ValidateParameters(const FieldVector& fields,
                                   const std::vector<int8_t>& type_codes,
                                   UnionMode::type mode);

 private:
  std::string ComputeFingerprint() const override;

  std::vector<int8_t> type_codes_;
  std::vector<int> child_ids_;
};

/// \brief Concrete type class for sparse union data
///
/// A sparse union is a nested type where each logical value is taken from
/// a single child.  A buffer of 8-bit type ids indicates which child
/// a given logical value is to be taken from.
///
/// In a sparse union, each child array should have the same length as the
/// union array, regardless of the actual number of union values that
/// refer to it.
///
/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
class ARROW_EXPORT SparseUnionType : public UnionType {
 public:
  static constexpr Type::type type_id = Type::SPARSE_UNION;

  static constexpr const char* type_name() { return "sparse_union"; }

  SparseUnionType(FieldVector fields, std::vector<int8_t> type_codes);

  // A constructor variant that validates input parameters
  static Result<std::shared_ptr<DataType>> Make(FieldVector fields,
                                                std::vector<int8_t> type_codes);

  std::string name() const override { return "sparse_union"; }
};

/// \brief Concrete type class for dense union data
///
/// A dense union is a nested type where each logical value is taken from
/// a single child, at a specific offset.  A buffer of 8-bit type ids
/// indicates which child a given logical value is to be taken from,
/// and a buffer of 32-bit offsets indicates at which physical position
/// in the given child array the logical value is to be taken from.
///
/// Unlike a sparse union, a dense union allows encoding only the child array
/// values which are actually referred to by the union array.  This is
/// counterbalanced by the additional footprint of the offsets buffer, and
/// the additional indirection cost when looking up values.
///
/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
class ARROW_EXPORT DenseUnionType : public UnionType {
 public:
  static constexpr Type::type type_id = Type::DENSE_UNION;

  static constexpr const char* type_name() { return "dense_union"; }

  DenseUnionType(FieldVector fields, std::vector<int8_t> type_codes);

  // A constructor variant that validates input parameters
  static Result<std::shared_ptr<DataType>> Make(FieldVector fields,
                                                std::vector<int8_t> type_codes);

  std::string name() const override { return "dense_union"; }
};

/// \brief Type class for run-end encoded data
class ARROW_EXPORT RunEndEncodedType : public NestedType {
 public:
  static constexpr Type::type type_id = Type::RUN_END_ENCODED;

  static constexpr const char* type_name() { return "run_end_encoded"; }

  explicit RunEndEncodedType(std::shared_ptr<DataType> run_end_type,
                             std::shared_ptr<DataType> value_type);
  ~RunEndEncodedType() override;

  DataTypeLayout layout() const override {
    // A lot of existing code expects at least one buffer
    return DataTypeLayout({DataTypeLayout::AlwaysNull()});
  }

  const std::shared_ptr<DataType>& run_end_type() const { return fields()[0]->type(); }
  const std::shared_ptr<DataType>& value_type() const { return fields()[1]->type(); }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "run_end_encoded"; }

  static bool RunEndTypeValid(const DataType& run_end_type);

 private:
  std::string ComputeFingerprint() const override;
};

/// @}

// ----------------------------------------------------------------------
// Date and time types

/// \addtogroup temporal-datatypes
///
/// @{

/// \brief Base type for all date and time types
class ARROW_EXPORT TemporalType : public FixedWidthType {
 public:
  using FixedWidthType::FixedWidthType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~TemporalType() override;

  DataTypeLayout layout() const override {
    return DataTypeLayout(
        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(bit_width() / 8)});
  }
};

/// \brief Base type class for date data
class ARROW_EXPORT DateType : public TemporalType {
 public:
  virtual DateUnit unit() const = 0;

 protected:
  explicit DateType(Type::type type_id);
};

/// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
class ARROW_EXPORT Date32Type : public DateType {
 public:
  static constexpr Type::type type_id = Type::DATE32;
  static constexpr DateUnit UNIT = DateUnit::DAY;
  using c_type = int32_t;
  using PhysicalType = Int32Type;

  static constexpr const char* type_name() { return "date32"; }

  Date32Type();

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "date32"; }
  DateUnit unit() const override { return UNIT; }

 protected:
  std::string ComputeFingerprint() const override;
};

/// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
class ARROW_EXPORT Date64Type : public DateType {
 public:
  static constexpr Type::type type_id = Type::DATE64;
  static constexpr DateUnit UNIT = DateUnit::MILLI;
  using c_type = int64_t;
  using PhysicalType = Int64Type;

  static constexpr const char* type_name() { return "date64"; }

  Date64Type();

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "date64"; }
  DateUnit unit() const override { return UNIT; }

 protected:
  std::string ComputeFingerprint() const override;
};

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);

/// Base type class for time data
class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
 public:
  TimeUnit::type unit() const { return unit_; }

 protected:
  TimeType(Type::type type_id, TimeUnit::type unit);
  std::string ComputeFingerprint() const override;

  TimeUnit::type unit_;
};

/// Concrete type class for 32-bit time data (as number of seconds or milliseconds
/// since midnight)
class ARROW_EXPORT Time32Type : public TimeType {
 public:
  static constexpr Type::type type_id = Type::TIME32;
  using c_type = int32_t;
  using PhysicalType = Int32Type;

  static constexpr const char* type_name() { return "time32"; }

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "time32"; }
};

/// Concrete type class for 64-bit time data (as number of microseconds or nanoseconds
/// since midnight)
class ARROW_EXPORT Time64Type : public TimeType {
 public:
  static constexpr Type::type type_id = Type::TIME64;
  using c_type = int64_t;
  using PhysicalType = Int64Type;

  static constexpr const char* type_name() { return "time64"; }

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);

  std::string ToString(bool show_metadata = false) const override;

  std::string name() const override { return "time64"; }
};

/// \brief Concrete type class for datetime data (as number of seconds, milliseconds,
/// microseconds or nanoseconds since UNIX epoch)
///
/// If supplied, the timezone string should take either the form (i) "Area/Location",
/// with values drawn from the names in the IANA Time Zone Database (such as
/// "Europe/Zurich"); or (ii) "(+|-)HH:MM" indicating an absolute offset from GMT
/// (such as "-08:00").  To indicate a native UTC timestamp, one of the strings "UTC",
/// "Etc/UTC" or "+00:00" should be used.
///
/// If any non-empty string is supplied as the timezone for a TimestampType, then the
/// Arrow field containing that timestamp type (and by extension the column associated
/// with such a field) is considered "timezone-aware".  The integer arrays that comprise
/// a timezone-aware column must contain UTC normalized datetime values, regardless of
/// the contents of their timezone string.  More precisely, (i) the producer of a
/// timezone-aware column must populate its constituent arrays with valid UTC values
/// (performing offset conversions from non-UTC values if necessary); and (ii) the
/// consumer of a timezone-aware column may assume that the column's values are directly
/// comparable (that is, with no offset adjustment required) to the values of any other
/// timezone-aware column or to any other valid UTC datetime value (provided all values
/// are expressed in the same units).
///
/// If a TimestampType is constructed without a timezone (or, equivalently, if the
/// timezone supplied is an empty string) then the resulting Arrow field (column) is
/// considered "timezone-naive".  The producer of a timezone-naive column may populate
/// its constituent integer arrays with datetime values from any timezone; the consumer
/// of a timezone-naive column should make no assumptions about the interoperability or
/// comparability of the values of such a column with those of any other timestamp
/// column or datetime value.
///
/// If a timezone-aware field contains a recognized timezone, its values may be
/// localized to that locale upon display; the values of timezone-naive fields must
/// always be displayed "as is", with no localization performed on them.
class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType {
 public:
  using Unit = TimeUnit;

  static constexpr Type::type type_id = Type::TIMESTAMP;
  using c_type = int64_t;
  using PhysicalType = Int64Type;

  static constexpr const char* type_name() { return "timestamp"; }

  int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }

  explicit TimestampType(TimeUnit::type unit = TimeUnit::MILLI)
      : TemporalType(Type::TIMESTAMP), unit_(unit) {}

  explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
      : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "timestamp"; }

  TimeUnit::type unit() const { return unit_; }
  const std::string& timezone() const { return timezone_; }

 protected:
  std::string ComputeFingerprint() const override;

 private:
  TimeUnit::type unit_;
  std::string timezone_;
};

// Base class for the different kinds of calendar intervals.
class ARROW_EXPORT IntervalType : public TemporalType, public ParametricType {
 public:
  enum type { MONTHS, DAY_TIME, MONTH_DAY_NANO };

  virtual type interval_type() const = 0;

 protected:
  explicit IntervalType(Type::type subtype) : TemporalType(subtype) {}
  std::string ComputeFingerprint() const override;
};

/// \brief Represents a number of months.
///
/// Type representing a number of months.  Corresponds to YearMonth type
/// in Schema.fbs (years are defined as 12 months).
class ARROW_EXPORT MonthIntervalType : public IntervalType {
 public:
  static constexpr Type::type type_id = Type::INTERVAL_MONTHS;
  using c_type = int32_t;
  using PhysicalType = Int32Type;

  static constexpr const char* type_name() { return "month_interval"; }

  IntervalType::type interval_type() const override { return IntervalType::MONTHS; }

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  MonthIntervalType() : IntervalType(type_id) {}

  std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
    return name();
  }
  std::string name() const override { return "month_interval"; }
};

/// \brief Represents a number of days and milliseconds (fraction of day).
class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
 public:
  struct DayMilliseconds {
    int32_t days = 0;
    int32_t milliseconds = 0;
    constexpr DayMilliseconds() = default;
    constexpr DayMilliseconds(int32_t days, int32_t milliseconds)
        : days(days), milliseconds(milliseconds) {}
    bool operator==(DayMilliseconds other) const {
      return this->days == other.days && this->milliseconds == other.milliseconds;
    }
    bool operator!=(DayMilliseconds other) const { return !(*this == other); }
    bool operator<(DayMilliseconds other) const {
      return this->days < other.days || this->milliseconds < other.milliseconds;
    }
  };
  using c_type = DayMilliseconds;
  using PhysicalType = DayTimeIntervalType;

  static_assert(sizeof(DayMilliseconds) == 8,
                "DayMilliseconds struct assumed to be of size 8 bytes");
  static constexpr Type::type type_id = Type::INTERVAL_DAY_TIME;

  static constexpr const char* type_name() { return "day_time_interval"; }

  IntervalType::type interval_type() const override { return IntervalType::DAY_TIME; }

  DayTimeIntervalType() : IntervalType(type_id) {}

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
    return name();
  }
  std::string name() const override { return "day_time_interval"; }
};

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, DayTimeIntervalType::DayMilliseconds interval);

/// \brief Represents a number of months, days and nanoseconds between
/// two dates.
///
/// All fields are independent from one another.
class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType {
 public:
  struct MonthDayNanos {
    int32_t months;
    int32_t days;
    int64_t nanoseconds;
    bool operator==(MonthDayNanos other) const {
      return this->months == other.months && this->days == other.days &&
             this->nanoseconds == other.nanoseconds;
    }
    bool operator!=(MonthDayNanos other) const { return !(*this == other); }
  };
  using c_type = MonthDayNanos;
  using PhysicalType = MonthDayNanoIntervalType;

  static_assert(sizeof(MonthDayNanos) == 16,
                "MonthDayNanos struct assumed to be of size 16 bytes");
  static constexpr Type::type type_id = Type::INTERVAL_MONTH_DAY_NANO;

  static constexpr const char* type_name() { return "month_day_nano_interval"; }

  IntervalType::type interval_type() const override {
    return IntervalType::MONTH_DAY_NANO;
  }

  MonthDayNanoIntervalType() : IntervalType(type_id) {}

  int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }

  std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
    return name();
  }
  std::string name() const override { return "month_day_nano_interval"; }
};

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os,
                         MonthDayNanoIntervalType::MonthDayNanos interval);

/// \brief Represents an elapsed time without any relation to a calendar artifact.
class ARROW_EXPORT DurationType : public TemporalType, public ParametricType {
 public:
  using Unit = TimeUnit;

  static constexpr Type::type type_id = Type::DURATION;
  using c_type = int64_t;
  using PhysicalType = Int64Type;

  static constexpr const char* type_name() { return "duration"; }

  int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }

  explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
      : TemporalType(Type::DURATION), unit_(unit) {}

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "duration"; }

  TimeUnit::type unit() const { return unit_; }

 protected:
  std::string ComputeFingerprint() const override;

 private:
  TimeUnit::type unit_;
};

/// @}

// ----------------------------------------------------------------------
// Dictionary type (for representing categorical or dictionary-encoded
// in memory)

/// \brief Dictionary-encoded value type with data-dependent
/// dictionary. Indices are represented by any integer types.
class ARROW_EXPORT DictionaryType : public FixedWidthType {
 public:
  static constexpr Type::type type_id = Type::DICTIONARY;

  static constexpr const char* type_name() { return "dictionary"; }

  DictionaryType(const std::shared_ptr<DataType>& index_type,
                 const std::shared_ptr<DataType>& value_type, bool ordered = false);

  // A constructor variant that validates its input parameters
  static Result<std::shared_ptr<DataType>> Make(
      const std::shared_ptr<DataType>& index_type,
      const std::shared_ptr<DataType>& value_type, bool ordered = false);

  std::string ToString(bool show_metadata = false) const override;
  std::string name() const override { return "dictionary"; }

  int bit_width() const override;

  DataTypeLayout layout() const override;

  const std::shared_ptr<DataType>& index_type() const { return index_type_; }
  const std::shared_ptr<DataType>& value_type() const { return value_type_; }

  bool ordered() const { return ordered_; }

 protected:
  static Status ValidateParameters(const DataType& index_type,
                                   const DataType& value_type);

  std::string ComputeFingerprint() const override;

  // Must be an integer type (not currently checked)
  std::shared_ptr<DataType> index_type_;
  std::shared_ptr<DataType> value_type_;
  bool ordered_;
};

// ----------------------------------------------------------------------
// FieldRef

/// \class FieldPath
///
/// Represents a path to a nested field using indices of child fields.
/// For example, given indices {5, 9, 3} the field would be retrieved with
/// schema->field(5)->type()->field(9)->type()->field(3)
///
/// Attempting to retrieve a child field using a FieldPath which is not valid for
/// a given schema will raise an error. Invalid FieldPaths include:
/// - an index is out of range
/// - the path is empty (note: a default constructed FieldPath will be empty)
///
/// FieldPaths provide a number of accessors for drilling down to potentially nested
/// children. They are overloaded for convenience to support Schema (returns a field),
/// DataType (returns a child field), Field (returns a child field of this field's type)
/// Array (returns a child array), RecordBatch (returns a column).
class ARROW_EXPORT FieldPath {
 public:
  FieldPath() = default;

  FieldPath(std::vector<int> indices)  // NOLINT runtime/explicit
      : indices_(std::move(indices)) {}

  FieldPath(std::initializer_list<int> indices)  // NOLINT runtime/explicit
      : indices_(std::move(indices)) {}

  std::string ToString() const;

  size_t hash() const;
  struct Hash {
    size_t operator()(const FieldPath& path) const { return path.hash(); }
  };

  bool empty() const { return indices_.empty(); }
  bool operator==(const FieldPath& other) const { return indices() == other.indices(); }
  bool operator!=(const FieldPath& other) const { return indices() != other.indices(); }

  const std::vector<int>& indices() const { return indices_; }
  int operator[](size_t i) const { return indices_[i]; }
  std::vector<int>::const_iterator begin() const { return indices_.begin(); }
  std::vector<int>::const_iterator end() const { return indices_.end(); }

  /// \brief Retrieve the referenced child Field from a Schema, Field, or DataType
  Result<std::shared_ptr<Field>> Get(const Schema& schema) const;
  Result<std::shared_ptr<Field>> Get(const Field& field) const;
  Result<std::shared_ptr<Field>> Get(const DataType& type) const;
  Result<std::shared_ptr<Field>> Get(const FieldVector& fields) const;

  static Result<std::shared_ptr<Schema>> GetAll(const Schema& schema,
                                                const std::vector<FieldPath>& paths);

  /// \brief Retrieve the referenced column from a RecordBatch or Table
  Result<std::shared_ptr<Array>> Get(const RecordBatch& batch) const;
  Result<std::shared_ptr<ChunkedArray>> Get(const Table& table) const;

  /// \brief Retrieve the referenced child from an Array or ArrayData
  Result<std::shared_ptr<Array>> Get(const Array& array) const;
  Result<std::shared_ptr<ArrayData>> Get(const ArrayData& data) const;

  /// \brief Retrieve the referenced child from a ChunkedArray
  Result<std::shared_ptr<ChunkedArray>> Get(const ChunkedArray& chunked_array) const;

  /// \brief Retrieve the referenced child/column from an Array, ArrayData, ChunkedArray,
  /// RecordBatch, or Table
  ///
  /// Unlike `FieldPath::Get`, these variants are not zero-copy and the retrieved child's
  /// null bitmap is ANDed with its ancestors'
  Result<std::shared_ptr<Array>> GetFlattened(const Array& array,
                                              MemoryPool* pool = NULLPTR) const;
  Result<std::shared_ptr<ArrayData>> GetFlattened(const ArrayData& data,
                                                  MemoryPool* pool = NULLPTR) const;
  Result<std::shared_ptr<ChunkedArray>> GetFlattened(const ChunkedArray& chunked_array,
                                                     MemoryPool* pool = NULLPTR) const;
  Result<std::shared_ptr<Array>> GetFlattened(const RecordBatch& batch,
                                              MemoryPool* pool = NULLPTR) const;
  Result<std::shared_ptr<ChunkedArray>> GetFlattened(const Table& table,
                                                     MemoryPool* pool = NULLPTR) const;

 private:
  std::vector<int> indices_;
};

/// \class FieldRef
/// \brief Descriptor of a (potentially nested) field within a schema.
///
/// Unlike FieldPath (which exclusively uses indices of child fields), FieldRef may
/// reference a field by name. It is intended to replace parameters like `int field_index`
/// and `const std::string& field_name`; it can be implicitly constructed from either a
/// field index or a name.
///
/// Nested fields can be referenced as well. Given
///     schema({field("a", struct_({field("n", null())})), field("b", int32())})
///
/// the following all indicate the nested field named "n":
///     FieldRef ref1(0, 0);
///     FieldRef ref2("a", 0);
///     FieldRef ref3("a", "n");
///     FieldRef ref4(0, "n");
///     ARROW_ASSIGN_OR_RAISE(FieldRef ref5,
///                           FieldRef::FromDotPath(".a[0]"));
///
/// FieldPaths matching a FieldRef are retrieved using the member function FindAll.
/// Multiple matches are possible because field names may be duplicated within a schema.
/// For example:
///     Schema a_is_ambiguous({field("a", int32()), field("a", float32())});
///     auto matches = FieldRef("a").FindAll(a_is_ambiguous);
///     assert(matches.size() == 2);
///     assert(matches[0].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(0)));
///     assert(matches[1].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(1)));
///
/// Convenience accessors are available which raise a helpful error if the field is not
/// found or ambiguous, and for immediately calling FieldPath::Get to retrieve any
/// matching children:
///     auto maybe_match = FieldRef("struct", "field_i32").FindOneOrNone(schema);
///     auto maybe_column = FieldRef("struct", "field_i32").GetOne(some_table);
class ARROW_EXPORT FieldRef : public util::EqualityComparable<FieldRef> {
 public:
  FieldRef() = default;

  /// Construct a FieldRef using a string of indices. The reference will be retrieved as:
  /// schema.fields[self.indices[0]].type.fields[self.indices[1]] ...
  ///
  /// Empty indices are not valid.
  FieldRef(FieldPath indices);  // NOLINT runtime/explicit

  /// Construct a by-name FieldRef. Multiple fields may match a by-name FieldRef:
  /// [f for f in schema.fields where f.name == self.name]
  FieldRef(std::string name) : impl_(std::move(name)) {}    // NOLINT runtime/explicit
  FieldRef(const char* name) : impl_(std::string(name)) {}  // NOLINT runtime/explicit

  /// Equivalent to a single index string of indices.
  FieldRef(int index) : impl_(FieldPath({index})) {}  // NOLINT runtime/explicit

  /// Construct a nested FieldRef.
  explicit FieldRef(std::vector<FieldRef> refs) { Flatten(std::move(refs)); }

  /// Convenience constructor for nested FieldRefs: each argument will be used to
  /// construct a FieldRef
  template <typename A0, typename A1, typename... A>
  FieldRef(A0&& a0, A1&& a1, A&&... a) {
    Flatten({// cpplint thinks the following are constructor decls
             FieldRef(std::forward<A0>(a0)),     // NOLINT runtime/explicit
             FieldRef(std::forward<A1>(a1)),     // NOLINT runtime/explicit
             FieldRef(std::forward<A>(a))...});  // NOLINT runtime/explicit
  }

  /// Parse a dot path into a FieldRef.
  ///
  /// dot_path = '.' name
  ///          | '[' digit+ ']'
  ///          | dot_path+
  ///
  /// Examples:
  ///   ".alpha" => FieldRef("alpha")
  ///   "[2]" => FieldRef(2)
  ///   ".beta[3]" => FieldRef("beta", 3)
  ///   "[5].gamma.delta[7]" => FieldRef(5, "gamma", "delta", 7)
  ///   ".hello world" => FieldRef("hello world")
  ///   R"(.\[y\]\\tho\.\)" => FieldRef(R"([y]\tho.\)")
  ///
  /// Note: When parsing a name, a '\' preceding any other character will be dropped from
  /// the resulting name. Therefore if a name must contain the characters '.', '\', or '['
  /// those must be escaped with a preceding '\'.
  static Result<FieldRef> FromDotPath(const std::string& dot_path);
  std::string ToDotPath() const;

  bool Equals(const FieldRef& other) const { return impl_ == other.impl_; }

  std::string ToString() const;

  size_t hash() const;
  struct Hash {
    size_t operator()(const FieldRef& ref) const { return ref.hash(); }
  };

  explicit operator bool() const { return Equals(FieldPath{}); }
  bool operator!() const { return !Equals(FieldPath{}); }

  bool IsFieldPath() const { return std::holds_alternative<FieldPath>(impl_); }
  bool IsName() const { return std::holds_alternative<std::string>(impl_); }
  bool IsNested() const {
    if (IsName()) return false;
    if (IsFieldPath()) return std::get<FieldPath>(impl_).indices().size() > 1;
    return true;
  }

  /// \brief Return true if this ref is a name or a nested sequence of only names
  ///
  /// Useful for determining if iteration is possible without recursion or inner loops
  bool IsNameSequence() const {
    if (IsName()) return true;
    if (const auto* nested = nested_refs()) {
      for (const auto& ref : *nested) {
        if (!ref.IsName()) return false;
      }
      return !nested->empty();
    }
    return false;
  }

  const FieldPath* field_path() const {
    return IsFieldPath() ? &std::get<FieldPath>(impl_) : NULLPTR;
  }
  const std::string* name() const {
    return IsName() ? &std::get<std::string>(impl_) : NULLPTR;
  }
  const std::vector<FieldRef>* nested_refs() const {
    return std::holds_alternative<std::vector<FieldRef>>(impl_)
               ? &std::get<std::vector<FieldRef>>(impl_)
               : NULLPTR;
  }

  /// \brief Retrieve FieldPath of every child field which matches this FieldRef.
  std::vector<FieldPath> FindAll(const Schema& schema) const;
  std::vector<FieldPath> FindAll(const Field& field) const;
  std::vector<FieldPath> FindAll(const DataType& type) const;
  std::vector<FieldPath> FindAll(const FieldVector& fields) const;

  /// \brief Convenience function which applies FindAll to arg's type or schema.
  std::vector<FieldPath> FindAll(const ArrayData& array) const;
  std::vector<FieldPath> FindAll(const Array& array) const;
  std::vector<FieldPath> FindAll(const ChunkedArray& chunked_array) const;
  std::vector<FieldPath> FindAll(const RecordBatch& batch) const;
  std::vector<FieldPath> FindAll(const Table& table) const;

  /// \brief Convenience function: raise an error if matches is empty.
  template <typename T>
  Status CheckNonEmpty(const std::vector<FieldPath>& matches, const T& root) const {
    if (matches.empty()) {
      return Status::Invalid("No match for ", ToString(), " in ", root.ToString());
    }
    return Status::OK();
  }

  /// \brief Convenience function: raise an error if matches contains multiple FieldPaths.
  template <typename T>
  Status CheckNonMultiple(const std::vector<FieldPath>& matches, const T& root) const {
    if (matches.size() > 1) {
      return Status::Invalid("Multiple matches for ", ToString(), " in ",
                             root.ToString());
    }
    return Status::OK();
  }

  /// \brief Retrieve FieldPath of a single child field which matches this
  /// FieldRef. Emit an error if none or multiple match.
  template <typename T>
  Result<FieldPath> FindOne(const T& root) const {
    auto matches = FindAll(root);
    ARROW_RETURN_NOT_OK(CheckNonEmpty(matches, root));
    ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
    return std::move(matches[0]);
  }

  /// \brief Retrieve FieldPath of a single child field which matches this
  /// FieldRef. Emit an error if multiple match. An empty (invalid) FieldPath
  /// will be returned if none match.
  template <typename T>
  Result<FieldPath> FindOneOrNone(const T& root) const {
    auto matches = FindAll(root);
    ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
    if (matches.empty()) {
      return FieldPath();
    }
    return std::move(matches[0]);
  }

  template <typename T>
  using GetType = decltype(std::declval<FieldPath>().Get(std::declval<T>()).ValueOrDie());

  /// \brief Get all children matching this FieldRef.
  template <typename T>
  std::vector<GetType<T>> GetAll(const T& root) const {
    std::vector<GetType<T>> out;
    for (const auto& match : FindAll(root)) {
      out.push_back(match.Get(root).ValueOrDie());
    }
    return out;
  }
  /// \brief Get all children matching this FieldRef.
  ///
  /// Unlike `FieldRef::GetAll`, this variant is not zero-copy and the retrieved
  /// children's null bitmaps are ANDed with their ancestors'
  template <typename T>
  Result<std::vector<GetType<T>>> GetAllFlattened(const T& root,
                                                  MemoryPool* pool = NULLPTR) const {
    std::vector<GetType<T>> out;
    for (const auto& match : FindAll(root)) {
      ARROW_ASSIGN_OR_RAISE(auto child, match.GetFlattened(root, pool));
      out.push_back(std::move(child));
    }
    return out;
  }

  /// \brief Get the single child matching this FieldRef.
  /// Emit an error if none or multiple match.
  template <typename T>
  Result<GetType<T>> GetOne(const T& root) const {
    ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
    return match.Get(root).ValueOrDie();
  }
  /// \brief Get the single child matching this FieldRef.
  ///
  /// Unlike `FieldRef::GetOne`, this variant is not zero-copy and the retrieved
  /// child's null bitmap is ANDed with its ancestors'
  template <typename T>
  Result<GetType<T>> GetOneFlattened(const T& root, MemoryPool* pool = NULLPTR) const {
    ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
    return match.GetFlattened(root, pool);
  }

  /// \brief Get the single child matching this FieldRef.
  /// Return nullptr if none match, emit an error if multiple match.
  template <typename T>
  Result<GetType<T>> GetOneOrNone(const T& root) const {
    ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
    if (match.empty()) {
      return static_cast<GetType<T>>(NULLPTR);
    }
    return match.Get(root).ValueOrDie();
  }
  /// \brief Get the single child matching this FieldRef.
  ///
  /// Return nullptr if none match, emit an error if multiple match.
  /// Unlike `FieldRef::GetOneOrNone`, this variant is not zero-copy and the
  /// retrieved child's null bitmap is ANDed with its ancestors'
  template <typename T>
  Result<GetType<T>> GetOneOrNoneFlattened(const T& root,
                                           MemoryPool* pool = NULLPTR) const {
    ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
    if (match.empty()) {
      return static_cast<GetType<T>>(NULLPTR);
    }
    return match.GetFlattened(root, pool);
  }

 private:
  void Flatten(std::vector<FieldRef> children);

  std::variant<FieldPath, std::string, std::vector<FieldRef>> impl_;
};

ARROW_EXPORT void PrintTo(const FieldRef& ref, std::ostream* os);

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const FieldRef&);

// ----------------------------------------------------------------------
// Schema

enum class Endianness {
  Little = 0,
  Big = 1,
#if ARROW_LITTLE_ENDIAN
  Native = Little
#else
  Native = Big
#endif
};

/// \class Schema
/// \brief Sequence of arrow::Field objects describing the columns of a record
/// batch or table data structure
class ARROW_EXPORT Schema : public detail::Fingerprintable,
                            public util::EqualityComparable<Schema>,
                            public util::ToStringOstreamable<Schema> {
 public:
  explicit Schema(FieldVector fields, Endianness endianness,
                  std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);

  explicit Schema(FieldVector fields,
                  std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);

  Schema(const Schema&);

  ~Schema() override;

  /// Returns true if all of the schema fields are equal
  bool Equals(const Schema& other, bool check_metadata = false) const;
  bool Equals(const std::shared_ptr<Schema>& other, bool check_metadata = false) const;

  /// \brief Set endianness in the schema
  ///
  /// \return new Schema
  std::shared_ptr<Schema> WithEndianness(Endianness endianness) const;

  /// \brief Return endianness in the schema
  Endianness endianness() const;

  /// \brief Indicate if endianness is equal to platform-native endianness
  bool is_native_endian() const;

  /// \brief Return the number of fields (columns) in the schema
  int num_fields() const;

  /// Return the ith schema element. Does not boundscheck
  const std::shared_ptr<Field>& field(int i) const;

  const FieldVector& fields() const;

  std::vector<std::string> field_names() const;

  /// Returns null if name not found
  std::shared_ptr<Field> GetFieldByName(const std::string& name) const;

  /// \brief Return the indices of all fields having this name in sorted order
  FieldVector GetAllFieldsByName(const std::string& name) const;

  /// Returns -1 if name not found
  int GetFieldIndex(const std::string& name) const;

  /// Return the indices of all fields having this name
  std::vector<int> GetAllFieldIndices(const std::string& name) const;

  /// Indicate if field named `name` can be found unambiguously in the schema.
  Status CanReferenceFieldByName(const std::string& name) const;

  /// Indicate if fields named `names` can be found unambiguously in the schema.
  Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;

  /// \brief The custom key-value metadata, if any
  ///
  /// \return metadata may be null
  const std::shared_ptr<const KeyValueMetadata>& metadata() const;

  /// \brief Render a string representation of the schema suitable for debugging
  /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
  /// print keys and values in the output
  std::string ToString(bool show_metadata = false) const;

  Result<std::shared_ptr<Schema>> AddField(int i,
                                           const std::shared_ptr<Field>& field) const;
  Result<std::shared_ptr<Schema>> RemoveField(int i) const;
  Result<std::shared_ptr<Schema>> SetField(int i,
                                           const std::shared_ptr<Field>& field) const;

  /// \brief Replace field names with new names
  ///
  /// \param[in] names new names
  /// \return new Schema
  Result<std::shared_ptr<Schema>> WithNames(const std::vector<std::string>& names) const;

  /// \brief Replace key-value metadata with new metadata
  ///
  /// \param[in] metadata new KeyValueMetadata
  /// \return new Schema
  std::shared_ptr<Schema> WithMetadata(
      const std::shared_ptr<const KeyValueMetadata>& metadata) const;

  /// \brief Return copy of Schema without the KeyValueMetadata
  std::shared_ptr<Schema> RemoveMetadata() const;

  /// \brief Indicate that the Schema has non-empty KevValueMetadata
  bool HasMetadata() const;

  /// \brief Indicate that the Schema has distinct field names.
  bool HasDistinctFieldNames() const;

 protected:
  std::string ComputeFingerprint() const override;
  std::string ComputeMetadataFingerprint() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

ARROW_EXPORT void PrintTo(const Schema& s, std::ostream* os);

ARROW_EXPORT
std::string EndiannessToString(Endianness endianness);

// ----------------------------------------------------------------------

/// \brief Convenience class to incrementally construct/merge schemas.
///
/// This class amortizes the cost of validating field name conflicts by
/// maintaining the mapping. The caller also controls the conflict resolution
/// scheme.
class ARROW_EXPORT SchemaBuilder {
 public:
  // Indicate how field conflict(s) should be resolved when building a schema. A
  // conflict arise when a field is added to the builder and one or more field(s)
  // with the same name already exists.
  enum ConflictPolicy {
    // Ignore the conflict and append the field. This is the default behavior of the
    // Schema constructor and the `arrow::schema` factory function.
    CONFLICT_APPEND = 0,
    // Keep the existing field and ignore the newer one.
    CONFLICT_IGNORE,
    // Replace the existing field with the newer one.
    CONFLICT_REPLACE,
    // Merge the fields. The merging behavior can be controlled by `Field::MergeOptions`
    // specified at construction time. Also see documentation of `Field::MergeWith`.
    CONFLICT_MERGE,
    // Refuse the new field and error out.
    CONFLICT_ERROR
  };

  /// \brief Construct an empty SchemaBuilder
  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
  SchemaBuilder(
      ConflictPolicy conflict_policy = CONFLICT_APPEND,
      Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
  /// \brief Construct a SchemaBuilder from a list of fields
  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
  SchemaBuilder(
      FieldVector fields, ConflictPolicy conflict_policy = CONFLICT_APPEND,
      Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
  /// \brief Construct a SchemaBuilder from a schema, preserving the metadata
  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
  SchemaBuilder(
      const std::shared_ptr<Schema>& schema,
      ConflictPolicy conflict_policy = CONFLICT_APPEND,
      Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());

  /// \brief Return the conflict resolution method.
  ConflictPolicy policy() const;

  /// \brief Set the conflict resolution method.
  void SetPolicy(ConflictPolicy resolution);

  /// \brief Add a field to the constructed schema.
  ///
  /// \param[in] field to add to the constructed Schema.
  /// \return A failure if encountered.
  Status AddField(const std::shared_ptr<Field>& field);

  /// \brief Add multiple fields to the constructed schema.
  ///
  /// \param[in] fields to add to the constructed Schema.
  /// \return The first failure encountered, if any.
  Status AddFields(const FieldVector& fields);

  /// \brief Add fields of a Schema to the constructed Schema.
  ///
  /// \param[in] schema to take fields to add to the constructed Schema.
  /// \return The first failure encountered, if any.
  Status AddSchema(const std::shared_ptr<Schema>& schema);

  /// \brief Add fields of multiple Schemas to the constructed Schema.
  ///
  /// \param[in] schemas to take fields to add to the constructed Schema.
  /// \return The first failure encountered, if any.
  Status AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas);

  Status AddMetadata(const KeyValueMetadata& metadata);

  /// \brief Return the constructed Schema.
  ///
  /// The builder internal state is not affected by invoking this method, i.e.
  /// a single builder can yield multiple incrementally constructed schemas.
  ///
  /// \return the constructed schema.
  Result<std::shared_ptr<Schema>> Finish() const;

  /// \brief Merge schemas in a unified schema according to policy.
  static Result<std::shared_ptr<Schema>> Merge(
      const std::vector<std::shared_ptr<Schema>>& schemas,
      ConflictPolicy policy = CONFLICT_MERGE);

  /// \brief Indicate if schemas are compatible to merge according to policy.
  static Status AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
                              ConflictPolicy policy = CONFLICT_MERGE);

  /// \brief Reset internal state with an empty schema (and metadata).
  void Reset();

  ~SchemaBuilder();

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;

  Status AppendField(const std::shared_ptr<Field>& field);
};

/// \brief Unifies schemas by merging fields by name.
///
/// The behavior of field merging can be controlled via `Field::MergeOptions`.
///
/// The resulting schema will contain the union of fields from all schemas.
/// Fields with the same name will be merged. See `Field::MergeOptions`.
/// - They are expected to be mergeable under provided `field_merge_options`.
/// - The unified field will inherit the metadata from the schema where
///   that field is first defined.
/// - The first N fields in the schema will be ordered the same as the
///   N fields in the first schema.
/// The resulting schema will inherit its metadata from the first input schema.
/// Returns an error if:
/// - Any input schema contains fields with duplicate names.
/// - Fields of the same name are not mergeable.
ARROW_EXPORT
Result<std::shared_ptr<Schema>> UnifySchemas(
    const std::vector<std::shared_ptr<Schema>>& schemas,
    Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());

namespace internal {

constexpr bool may_have_validity_bitmap(Type::type id) {
  switch (id) {
    case Type::NA:
    case Type::DENSE_UNION:
    case Type::SPARSE_UNION:
    case Type::RUN_END_ENCODED:
      return false;
    default:
      return true;
  }
}

ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() instead.")
constexpr bool HasValidityBitmap(Type::type id) { return may_have_validity_bitmap(id); }

ARROW_EXPORT
std::string ToString(Type::type id);

ARROW_EXPORT
std::string ToTypeName(Type::type id);

ARROW_EXPORT
std::string ToString(TimeUnit::type unit);

}  // namespace internal

// Helpers to get instances of data types based on general categories

/// \brief Signed integer types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
/// \brief Unsigned integer types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
/// \brief Signed and unsigned integer types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& IntTypes();
/// \brief Floating point types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
/// \brief Number types without boolean - integer and floating point types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& NumericTypes();
/// \brief Binary and string-like types (except fixed-size binary)
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();
/// \brief Binary and large-binary types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& BinaryTypes();
/// \brief String and large-string types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& StringTypes();
/// \brief String-view and Binary-view
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes();
/// \brief Temporal types including date, time and timestamps for each unit
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
/// \brief Interval types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& IntervalTypes();
/// \brief Duration types for each unit
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& DurationTypes();
/// \brief Numeric, base binary, date, boolean and null types
ARROW_EXPORT
const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();

/// \brief Decimal type ids
ARROW_EXPORT
const std::vector<Type::type>& DecimalTypeIds();

}  // namespace arrow