Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev70 

/ include / arrow / testing / random.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <algorithm>
#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <random>
#include <vector>

#include "arrow/testing/uniform_real.h"
#include "arrow/testing/visibility.h"
#include "arrow/type.h"

namespace arrow {

class Array;

namespace random {

using SeedType = int32_t;
constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();

class ARROW_TESTING_EXPORT RandomArrayGenerator {
 public:
  explicit RandomArrayGenerator(SeedType seed)
      : seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {}

  /// \brief Generate a null bitmap
  ///
  /// \param[in] size the size of the bitmap to generate
  /// \param[in] null_probability the probability of a bit being zero
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Buffer
  std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0,
                                     int64_t alignment = kDefaultBufferAlignment,
                                     MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random BooleanArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] true_probability the probability of a value being 1 / bit-set
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());
  /// \brief Generate a random UInt8Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int8Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
                              double null_probability = 0,
                              int64_t alignment = kDefaultBufferAlignment,
                              MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt16Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int16Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt32Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int32Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random UInt64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Int64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
                               double null_probability = 0,
                               int64_t alignment = kDefaultBufferAlignment,
                               MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random HalfFloatArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the distribution
  /// \param[in] max the upper bound of the distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random FloatArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] nan_probability the probability of a value being NaN
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float32(int64_t size, float min, float max,
                                 double null_probability = 0, double nan_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random DoubleArray
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] nan_probability the probability of a value being NaN
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Float64(int64_t size, double min, double max,
                                 double null_probability = 0, double nan_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Date64Array
  ///
  /// \param[in] size the size of the array to generate
  /// \param[in] min the lower bound of the uniform distribution
  /// \param[in] max the upper bound of the uniform distribution
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max,
                                double null_probability = 0,
                                int64_t alignment = kDefaultBufferAlignment,
                                MemoryPool* memory_pool = default_memory_pool());

  template <typename ArrowType, typename CType = typename ArrowType::c_type>
  std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max,
                                 double null_probability = 0,
                                 int64_t alignment = kDefaultBufferAlignment,
                                 MemoryPool* memory_pool = default_memory_pool()) {
    switch (ArrowType::type_id) {
      case Type::UINT8:
        return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::INT8:
        return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max),
                    null_probability, alignment, memory_pool);
      case Type::UINT16:
        return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT16:
        return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::UINT32:
        return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT32:
        return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::UINT64:
        return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max),
                      null_probability, alignment, memory_pool);
      case Type::INT64:
        return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
                     null_probability, alignment, memory_pool);
      case Type::HALF_FLOAT:
        return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
                       null_probability, alignment, memory_pool);
      case Type::FLOAT:
        return Float32(size, static_cast<float>(min), static_cast<float>(max),
                       null_probability, /*nan_probability=*/0, alignment, memory_pool);
      case Type::DOUBLE:
        return Float64(size, static_cast<double>(min), static_cast<double>(max),
                       null_probability, /*nan_probability=*/0, alignment, memory_pool);
      case Type::DATE64:
        return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
                      null_probability, alignment, memory_pool);
      default:
        return nullptr;
    }
  }

  /// \brief Generate a random Decimal32Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal32Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal32(std::shared_ptr<DataType> type, int64_t size,
                                   double null_probability = 0,
                                   int64_t alignment = kDefaultBufferAlignment,
                                   MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal64Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal64Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal64(std::shared_ptr<DataType> type, int64_t size,
                                   double null_probability = 0,
                                   int64_t alignment = kDefaultBufferAlignment,
                                   MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal128Array
  ///
  /// \param[in] type the type of the array to generate
  ///            (must be an instance of Decimal128Type)
  /// \param[in] size the size of the array to generate
  /// \param[in] null_probability the probability of a value being null
  /// \param[in] alignment alignment for memory allocations (in bytes)
  /// \param[in] memory_pool memory pool to allocate memory from
  ///
  /// \return a generated Array
  std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
                                    double null_probability = 0,
                                    int64_t alignment = kDefaultBufferAlignment,
                                    MemoryPool* memory_pool = default_memory_pool());

  /// \brief Generate a random Decimal256Array
Loading ...