Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pyarrow / include / arrow / testing / random.h
Size: Mime:
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <algorithm>
#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <random>
#include <vector>

#include "arrow/testing/uniform_real.h"
#include "arrow/testing/visibility.h"
#include "arrow/type.h"

namespace arrow {

class Array;

namespace random {

using SeedType = int32_t;
constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();

class ARROW_TESTING_EXPORT RandomArrayGenerator {
 public:
  explicit RandomArrayGenerator(SeedType seed)
      : seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {}

  /// \brief Generate a null bitmap
  ///
  /// \param[in] size the size of the bitmap to generate
  /// \param[in] null_probability the probability of a bit being zero
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Buffer
  std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0,
                                     int64_t alignment = kDefaultBufferAlignment,
                                     MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random BooleanArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] true_probability the probability of a value being 1 / bit-set
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());
  /// \brief Generate a random UInt8Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int8Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
                              double null_probability = 0,
                              int64_t alignment = kDefaultBufferAlignment,
                              MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt16Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int16Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt32Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int32Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random HalfFloatArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the distribution
  /// \param[in] max the upper bound of the distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random FloatArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] nan_probability the probability of a value being NaN
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float32(int64_t size, float min, float max,
                                 double null_probability = 0, double nan_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random DoubleArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] nan_probability the probability of a value being NaN
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float64(int64_t size, double min, double max,
                                 double null_probability = 0, double nan_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Date64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  template <typename ArrowType, typename CType = typename ArrowType::c_type>
  std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool()) {
    switch (ArrowType::type_id) {
      case Type::UINT8:
        return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::INT8:
        return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max),
                    null_probability, alignment, memory_pool);
      case Type::UINT16:
        return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT16:
        return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::UINT32:
        return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT32:
        return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::UINT64:
        return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT64:
        return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::HALF_FLOAT:
        return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
                       null_probability, alignment, memory_pool);
      case Type::FLOAT:
        return Float32(size, static_cast<float>(min), static_cast<float>(max),
                       null_probability, /*nan_probability=*/0, alignment, memory_pool);
      case Type::DOUBLE:
        return Float64(size, static_cast<double>(min), static_cast<double>(max),
                       null_probability, /*nan_probability=*/0, alignment, memory_pool);
      case Type::DATE64:
        return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
                      null_probability, alignment, memory_pool);
      default:
        return nullptr;
    }
  }

  /// \brief Generate a random Decimal32Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal32Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal32(std::shared_ptr<DataType> type, int64_t size,
                                   double null_probability = 0,
                                   int64_t alignment = kDefaultBufferAlignment,
                                   MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal64Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal64Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal64(std::shared_ptr<DataType> type, int64_t size,
                                   double null_probability = 0,
                                   int64_t alignment = kDefaultBufferAlignment,
                                   MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal128Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal128Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
                                    double null_probability = 0,
                                    int64_t alignment = kDefaultBufferAlignment,
                                    MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal256Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal256Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
                                    double null_probability = 0,
                                    int64_t alignment = kDefaultBufferAlignment,
                                    MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] first_offset the first offset value (usually 0)
  /// \param[in] last_offset the last offset value (usually the size of the child array)
  /// \param[in] null_probability the probability of an offset being null
  /// \param[in] force_empty_nulls if true, null offsets must have 0 "length"
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Offsets(int64_t size, int32_t first_offset, int32_t last_offset,
                                 double null_probability = 0,
                                 bool force_empty_nulls = false,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  std::shared_ptr<Array> LargeOffsets(int64_t size, int64_t first_offset,
                                      int64_t last_offset, double null_probability = 0,
                                      bool force_empty_nulls = false,
                                      int64_t alignment = kDefaultBufferAlignment,
                                      MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random StringArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min_length the lower bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] max_length the upper bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random StringViewArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min_length the lower bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] max_length the upper bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] max_data_buffer_length the data buffer size at which
  ///            a new chunk will be generated
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> StringView(int64_t size, int32_t min_length, int32_t max_length,
                                    double null_probability = 0,
                                    std::optional<int64_t> max_data_buffer_length = {},
                                    int64_t alignment = kDefaultBufferAlignment,
                                    MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random LargeStringArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min_length the lower bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] max_length the upper bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
                                     double null_probability = 0,
                                     int64_t alignment = kDefaultBufferAlignment,
                                     MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random StringArray with repeated values
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] unique the number of unique string values used
  ///            to populate the array
  /// \param[in] min_length the lower bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] max_length the upper bound of the string length
  ///            determined by the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> StringWithRepeats(
      int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
      double null_probability = 0, int64_t alignment = kDefaultBufferAlignment,
      MemoryPool* memory_pool = default_memory_pool());

  /// \brief Like StringWithRepeats but return BinaryArray
  std::shared_ptr<Array> BinaryWithRepeats(
      int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
      double null_probability = 0, int64_t alignment = kDefaultBufferAlignment,
      MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random FixedSizeBinaryArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] byte_width the byte width of fixed-size binary items
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] min_byte the lower bound of each byte in the binary determined by the
  ///            uniform distribution
  /// \param[in] max_byte the upper bound of each byte in the binary determined by the
  ///            uniform distribution
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
                                         double null_probability = 0,
                                         uint8_t min_byte = static_cast<uint8_t>('A'),
                                         uint8_t max_byte = static_cast<uint8_t>('z'),
                                         int64_t alignment = kDefaultBufferAlignment,
                                         MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random ListArray
  ///
  /// \param[in] values The underlying values array
  /// \param[in] size The size of the generated list array
  /// \param[in] null_probability the probability of a list value being null
  /// \param[in] force_empty_nulls if true, null list entries must have 0 length
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> List(const Array& values, int64_t size,
                              double null_probability = 0, bool force_empty_nulls = false,
                              int64_t alignment = kDefaultBufferAlignment,
                              MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random ListViewArray
  ///
  /// \param[in] values The underlying values array
  /// \param[in] size The size of the generated list array
  /// \param[in] null_probability the probability of a list value being null
  /// \param[in] force_empty_nulls if true, null list entries must have 0 length
  /// must be set to 0
  /// \param[in] coverage proportion of the values array covered by list-views
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> ListView(const Array& values, int64_t size,
                                  double null_probability = 0,
                                  bool force_empty_nulls = false, double coverage = 1.0,
                                  int64_t alignment = kDefaultBufferAlignment,
                                  MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random LargeListViewArray
  ///
  /// \param[in] values The underlying values array
  /// \param[in] size The size of the generated list array
  /// \param[in] null_probability the probability of a list value being null
  /// \param[in] force_empty_nulls if true, null list entries must have 0 length
  /// must be set to 0
  /// \param[in] coverage proportion of the values array covered by list-views
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> LargeListView(const Array& values, int64_t size,
                                       double null_probability = 0,
                                       bool force_empty_nulls = false,
                                       double coverage = 1.0,
                                       int64_t alignment = kDefaultBufferAlignment,
                                       MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random MapArray
  ///
  /// \param[in] keys The underlying keys array
  /// \param[in] items The underlying items array
  /// \param[in] size The size of the generated map array
  /// \param[in] null_probability the probability of a map value being null
  /// \param[in] force_empty_nulls if true, null map entries must have 0 length
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
                             const std::shared_ptr<Array>& items, int64_t size,
                             double null_probability = 0, bool force_empty_nulls = false,
                             int64_t alignment = kDefaultBufferAlignment,
                             MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random RunEndEncodedArray
  ///
  /// \param[in] value_type The DataType of the encoded values
  /// \param[in] logical_size The logical length of the generated array
  /// \param[in] null_probability the probability of a value being null
  ///
  /// \return a generated Array
  std::shared_ptr<Array> RunEndEncoded(std::shared_ptr<DataType> value_type,
                                       int64_t logical_size,
                                       double null_probability = 0.0);

  /// \brief Generate a random SparseUnionArray
  ///
  /// The type ids are chosen randomly, according to a uniform distribution,
  /// amongst the given child fields.
  ///
  /// \param[in] fields Vector of Arrays containing the data for each union field
  /// \param[in] size The size of the generated sparse union array
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  std::shared_ptr<Array> SparseUnion(const ArrayVector& fields, int64_t size,
                                     int64_t alignment = kDefaultBufferAlignment,
                                     MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random DenseUnionArray
  ///
  /// The type ids are chosen randomly, according to a uniform distribution,
  /// amongst the given child fields.  The offsets are incremented along
  /// each child field.
  ///
  /// \param[in] fields Vector of Arrays containing the data for each union field
  /// \param[in] size The size of the generated sparse union array
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  std::shared_ptr<Array> DenseUnion(const ArrayVector& fields, int64_t size,
                                    int64_t alignment = kDefaultBufferAlignment,
                                    MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Array of the specified type, size, and null_probability.
  ///
  /// Generation parameters other than size and null_probability are determined based on
  /// the type of Array to be generated.
  /// If boolean the probabilities of true,false values are 0.25,0.75 respectively.
  /// If numeric min,max will be the least and greatest representable values.
  /// If string min_length,max_length will be 0,sqrt(size) respectively.
  ///
  /// \param[in] type the type of Array to generate
  /// \param[in] size the size of the Array to generate
  /// \param[in] null_probability the probability of a slot being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  /// \return a generated Array
  std::shared_ptr<Array> ArrayOf(std::shared_ptr<DataType> type, int64_t size,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate an array with random data based on the given field. See BatchOf
  /// for usage info.
  std::shared_ptr<Array> ArrayOf(const Field& field, int64_t size,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a record batch with random data of the specified length.
  ///
  /// Generation options are read from key-value metadata for each field, and may be
  /// specified at any nesting level. For example, generation options for the child
  /// values of a list array can be specified by constructing the list type with
  /// list(field("item", int8(), options_metadata))
  ///
  /// The following options are supported:
  ///
  /// For all types except NullType:
  /// - null_probability (double): range [0.0, 1.0] the probability of a null value.
  /// Default/value is 0.0 if the field is marked non-nullable, else it is 0.01
  ///
  /// For all numeric types T:
  /// - min (T::c_type): the minimum value to generate (inclusive), default
  ///   std::numeric_limits<T::c_type>::min()
  /// - max (T::c_type): the maximum value to generate (inclusive), default
  ///   std::numeric_limits<T::c_type>::max()
  /// Note this means that, for example, min/max are int16_t values for HalfFloatType.
  ///
  /// For floating point types T for which is_physical_floating_type<T>:
  /// - nan_probability (double): range [0.0, 1.0] the probability of a NaN value.
  ///
  /// For BooleanType:
  /// - true_probability (double): range [0.0, 1.0] the probability of a true.
  ///
  /// For DictionaryType:
  /// - values (int32_t): the size of the dictionary.
  /// Other properties are passed to the generator for the dictionary indices. However,
  /// min and max cannot be specified. Note it is not possible to otherwise customize
  /// the generation of dictionary values.
  ///
  /// For list, string, and binary types T, including their large variants:
  /// - min_length (T::offset_type): the minimum length of the child to generate,
  ///   default 0
  /// - max_length (T::offset_type): the minimum length of the child to generate,
  ///   default 1024
  ///
  /// For string and binary types T (not including their large or view variants):
  /// - unique (int32_t): if positive, this many distinct values will be generated
  ///   and all array values will be one of these values, default -1
  ///
  /// For string and binary view types T:
  /// - max_data_buffer_length (int64_t): the data buffer size at which a new chunk
  ///   will be generated, default 32KB
  ///
  /// For MapType:
  /// - values (int32_t): the number of key-value pairs to generate, which will be
  ///   partitioned among the array values.
  ///
  /// For extension types:
  /// - extension_allow_random_storage (bool): in general an extension array may have
  ///   invariants on its storage beyond those already imposed by the arrow format,
  ///   which may result in an invalid array if we just wrap randomly generated
  ///   storage. Set this flag to explicitly allow wrapping of randomly generated
  ///   storage.
  std::shared_ptr<arrow::RecordBatch> BatchOf(
      const FieldVector& fields, int64_t size,
      int64_t alignment = kDefaultBufferAlignment,
      MemoryPool* memory_pool = default_memory_pool());

  SeedType seed() { return seed_distribution_(seed_rng_); }

 private:
  std::uniform_int_distribution<SeedType> seed_distribution_;
  std::default_random_engine seed_rng_;
};

/// Generate a batch with random data. See RandomArrayGenerator::BatchOf.
ARROW_TESTING_EXPORT
std::shared_ptr<arrow::RecordBatch> GenerateBatch(
    const FieldVector& fields, int64_t size, SeedType seed,
    int64_t alignment = kDefaultBufferAlignment,
    MemoryPool* memory_pool = default_memory_pool());

/// Generate an array with random data. See RandomArrayGenerator::BatchOf.
ARROW_TESTING_EXPORT
std::shared_ptr<arrow::Array> GenerateArray(
    const Field& field, int64_t size, SeedType seed,
    int64_t alignment = kDefaultBufferAlignment,
    MemoryPool* memory_pool = default_memory_pool());

}  // namespace random

//
// Assorted functions
//

ARROW_TESTING_EXPORT
void rand_day_millis(int64_t N, std::vector<DayTimeIntervalType::DayMilliseconds>* out);
ARROW_TESTING_EXPORT
void rand_month_day_nanos(int64_t N,
                          std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out);

template <typename T, typename U>
void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
  const int random_seed = 0;
  std::default_random_engine gen(random_seed);
  std::uniform_int_distribution<T> d(lower, upper);
  out->resize(N, static_cast<T>(0));
  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}

template <typename T, typename U>
void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                 std::vector<U>* out) {
  std::default_random_engine gen(seed);
  ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
  out->resize(n, static_cast<T>(0));
  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}

template <typename T, typename U>
void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
  assert(out || (n == 0));
  std::default_random_engine gen(seed);
  std::uniform_int_distribution<T> d(min_value, max_value);
  std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
}

}  // namespace arrow