Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev70 

/ include / arrow / compute / kernel.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// NOTE: API is EXPERIMENTAL and will change without going through a
// deprecation cycle

#pragma once

#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "arrow/buffer.h"
#include "arrow/compute/exec.h"
#include "arrow/datum.h"
#include "arrow/device_allocation_type_set.h"
#include "arrow/memory_pool.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

// macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
// No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
#if defined(__APPLE__) && defined(PREALLOCATE)
#  undef PREALLOCATE
#endif

namespace arrow {
namespace compute {

class FunctionOptions;

/// \brief Base class for opaque kernel-specific state. For example, if there
/// is some kind of initialization required.
struct ARROW_EXPORT KernelState {
  virtual ~KernelState() = default;
};

/// \brief Context/state for the execution of a particular kernel.
class ARROW_EXPORT KernelContext {
 public:
  // Can pass optional backreference; not used consistently for the
  // moment but will be made so in the future
  explicit KernelContext(ExecContext* exec_ctx, const Kernel* kernel = NULLPTR)
      : exec_ctx_(exec_ctx), kernel_(kernel) {}

  /// \brief Allocate buffer from the context's memory pool. The contents are
  /// not initialized.
  Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);

  /// \brief Allocate buffer for bitmap from the context's memory pool. Like
  /// Allocate, the contents of the buffer are not initialized but the last
  /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
  Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);

  /// \brief Assign the active KernelState to be utilized for each stage of
  /// kernel execution. Ownership and memory lifetime of the KernelState must
  /// be minded separately.
  void SetState(KernelState* state) { state_ = state; }

  // Set kernel that is being invoked since some kernel
  // implementations will examine the kernel state.
  void SetKernel(const Kernel* kernel) { kernel_ = kernel; }

  KernelState* state() { return state_; }

  /// \brief Configuration related to function execution that is to be shared
  /// across multiple kernels.
  ExecContext* exec_context() { return exec_ctx_; }

  /// \brief The memory pool to use for allocations. For now, it uses the
  /// MemoryPool contained in the ExecContext used to create the KernelContext.
  MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }

  const Kernel* kernel() const { return kernel_; }

 private:
  ExecContext* exec_ctx_;
  KernelState* state_ = NULLPTR;
  const Kernel* kernel_ = NULLPTR;
};

/// \brief An type-checking interface to permit customizable validation rules
/// for use with InputType and KernelSignature. This is for scenarios where the
/// acceptance is not an exact type instance, such as a TIMESTAMP type for a
/// specific TimeUnit, but permitting any time zone.
struct ARROW_EXPORT TypeMatcher {
  virtual ~TypeMatcher() = default;

  /// \brief Return true if this matcher accepts the data type.
  virtual bool Matches(const DataType& type) const = 0;

  /// \brief A human-interpretable string representation of what the type
  /// matcher checks for, usable when printing KernelSignature or formatting
  /// error messages.
  virtual std::string ToString() const = 0;

  /// \brief Return true if this TypeMatcher contains the same matching rule as
  /// the other. Currently depends on RTTI.
  virtual bool Equals(const TypeMatcher& other) const = 0;
};

namespace match {

/// \brief Match any DataType instance having the same DataType::id.
ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);

/// \brief Match any TimestampType instance having the same unit, but the time
/// zones can be different.
ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);

// \brief Match any integer type
ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();

// Match types using 32-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();

// Match types using 64-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();

// Match any fixed binary type
ARROW_EXPORT std::shared_ptr<TypeMatcher> FixedSizeBinaryLike();

// \brief Match any primitive type (boolean or any type representable as a C
// Type)
ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();

// \brief Match any integer type that can be used as run-end in run-end encoded
// arrays
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndInteger();

/// \brief Match run-end encoded types that use any valid run-end type and
/// encode specific value types
///
/// @param[in] value_type_matcher a matcher that is applied to the values field
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
    std::shared_ptr<TypeMatcher> value_type_matcher);

/// \brief Match run-end encoded types that use any valid run-end type and
/// encode specific value types
///
/// @param[in] value_type_id a type id that the type of the values field should match
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(Type::type value_type_id);

/// \brief Match run-end encoded types that encode specific run-end and value types
///
/// @param[in] run_end_type_matcher a matcher that is applied to the run_ends field
/// @param[in] value_type_matcher a matcher that is applied to the values field
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
    std::shared_ptr<TypeMatcher> run_end_type_matcher,
    std::shared_ptr<TypeMatcher> value_type_matcher);

}  // namespace match

/// \brief An object used for type-checking arguments to be passed to a kernel
/// and stored in a KernelSignature. The type-checking rule can be supplied
/// either with an exact DataType instance or a custom TypeMatcher.
class ARROW_EXPORT InputType {
 public:
  /// \brief The kind of type-checking rule that the InputType contains.
  enum Kind {
    /// \brief Accept any value type.
    ANY_TYPE,

    /// \brief A fixed arrow::DataType and will only exact match having this
    /// exact type (e.g. same TimestampType unit, same decimal scale and
    /// precision, or same nested child types).
    EXACT_TYPE,

    /// \brief Uses a TypeMatcher implementation to check the type.
    USE_TYPE_MATCHER
  };

  /// \brief Accept any value type
  InputType() : kind_(ANY_TYPE) {}

  /// \brief Accept an exact value type.
  InputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
      : kind_(EXACT_TYPE), type_(std::move(type)) {}

  /// \brief Use the passed TypeMatcher to type check.
  InputType(std::shared_ptr<TypeMatcher> type_matcher)  // NOLINT implicit construction
      : kind_(USE_TYPE_MATCHER), type_matcher_(std::move(type_matcher)) {}

  /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
  /// its implementation.
  InputType(Type::type type_id)  // NOLINT implicit construction
      : InputType(match::SameTypeId(type_id)) {}

  InputType(const InputType& other) { CopyInto(other); }

  void operator=(const InputType& other) { CopyInto(other); }

  InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }

  void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }

  // \brief Match any input (array, scalar of any type)
  static InputType Any() { return InputType(); }

  /// \brief Return true if this input type matches the same type cases as the
  /// other.
  bool Equals(const InputType& other) const;

  bool operator==(const InputType& other) const { return this->Equals(other); }

  bool operator!=(const InputType& other) const { return !(*this == other); }

  /// \brief Return hash code.
  size_t Hash() const;

  /// \brief Render a human-readable string representation.
  std::string ToString() const;

  /// \brief Return true if the Datum matches this argument kind in
  /// type (and only allows scalar or array-like Datums).
  bool Matches(const Datum& value) const;

  /// \brief Return true if the type matches this InputType
  bool Matches(const DataType& type) const;

  /// \brief The type matching rule that this InputType uses.
  Kind kind() const { return kind_; }

  /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
  /// must match. Otherwise this function should not be used and will assert in
  /// debug builds.
  const std::shared_ptr<DataType>& type() const;

  /// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
  /// checking the type of a value. Otherwise this function should not be used
  /// and will assert in debug builds.
  const TypeMatcher& type_matcher() const;

 private:
  void CopyInto(const InputType& other) {
    this->kind_ = other.kind_;
    this->type_ = other.type_;
    this->type_matcher_ = other.type_matcher_;
  }

  void MoveInto(InputType&& other) {
    this->kind_ = other.kind_;
    this->type_ = std::move(other.type_);
    this->type_matcher_ = std::move(other.type_matcher_);
  }

  Kind kind_;

  // For EXACT_TYPE Kind
  std::shared_ptr<DataType> type_;

  // For USE_TYPE_MATCHER Kind
  std::shared_ptr<TypeMatcher> type_matcher_;
};

/// \brief Container to capture both exact and input-dependent output types.
class ARROW_EXPORT OutputType {
 public:
  /// \brief An enum indicating whether the value type is an invariant fixed
  /// value or one that's computed by a kernel-defined resolver function.
  enum ResolveKind { FIXED, COMPUTED };

  /// Type resolution function. Given input types, return output type.  This
  /// function MAY may use the kernel state to decide the output type based on
  /// the FunctionOptions.
  ///
  /// This function SHOULD _not_ be used to check for arity, that is to be
  /// performed one or more layers above.
  using Resolver =
      std::function<Result<TypeHolder>(KernelContext*, const std::vector<TypeHolder>&)>;

  /// \brief Output an exact type
  OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
      : kind_(FIXED), type_(std::move(type)) {}

  /// \brief Output a computed type depending on actual input types
  template <typename Fn>
  OutputType(Fn resolver)  // NOLINT implicit construction
      : kind_(COMPUTED), resolver_(std::move(resolver)) {}

  OutputType(const OutputType& other) {
    this->kind_ = other.kind_;
    this->type_ = other.type_;
    this->resolver_ = other.resolver_;
  }

  OutputType(OutputType&& other) {
    this->kind_ = other.kind_;
    this->type_ = std::move(other.type_);
    this->resolver_ = other.resolver_;
  }

  OutputType& operator=(const OutputType&) = default;
  OutputType& operator=(OutputType&&) = default;

  /// \brief Return the type of the expected output value of the kernel given
  /// the input argument types. The resolver may make use of state information
  /// kept in the KernelContext.
  Result<TypeHolder> Resolve(KernelContext* ctx,
                             const std::vector<TypeHolder>& args) const;

  /// \brief The exact output value type for the FIXED kind.
  const std::shared_ptr<DataType>& type() const;

  /// \brief For use with COMPUTED resolution strategy. It may be more
  /// convenient to invoke this with OutputType::Resolve returned from this
  /// method.
  const Resolver& resolver() const;

  /// \brief Render a human-readable string representation.
  std::string ToString() const;

  /// \brief Return the kind of type resolution of this output type, whether
  /// fixed/invariant or computed by a resolver.
  ResolveKind kind() const { return kind_; }

 private:
  ResolveKind kind_;

  // For FIXED resolution
  std::shared_ptr<DataType> type_;
Loading ...