// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// NOTE: API is EXPERIMENTAL and will change without going through a
// deprecation cycle
#pragma once
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "arrow/buffer.h"
#include "arrow/compute/exec.h"
#include "arrow/datum.h"
#include "arrow/device_allocation_type_set.h"
#include "arrow/memory_pool.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
// macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
// No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
#if defined(__APPLE__) && defined(PREALLOCATE)
# undef PREALLOCATE
#endif
namespace arrow {
namespace compute {
class FunctionOptions;
/// \brief Base class for opaque kernel-specific state. For example, if there
/// is some kind of initialization required.
struct ARROW_EXPORT KernelState {
virtual ~KernelState() = default;
};
/// \brief Context/state for the execution of a particular kernel.
class ARROW_EXPORT KernelContext {
public:
// Can pass optional backreference; not used consistently for the
// moment but will be made so in the future
explicit KernelContext(ExecContext* exec_ctx, const Kernel* kernel = NULLPTR)
: exec_ctx_(exec_ctx), kernel_(kernel) {}
/// \brief Allocate buffer from the context's memory pool. The contents are
/// not initialized.
Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);
/// \brief Allocate buffer for bitmap from the context's memory pool. Like
/// Allocate, the contents of the buffer are not initialized but the last
/// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
/// \brief Assign the active KernelState to be utilized for each stage of
/// kernel execution. Ownership and memory lifetime of the KernelState must
/// be minded separately.
void SetState(KernelState* state) { state_ = state; }
// Set kernel that is being invoked since some kernel
// implementations will examine the kernel state.
void SetKernel(const Kernel* kernel) { kernel_ = kernel; }
KernelState* state() { return state_; }
/// \brief Configuration related to function execution that is to be shared
/// across multiple kernels.
ExecContext* exec_context() { return exec_ctx_; }
/// \brief The memory pool to use for allocations. For now, it uses the
/// MemoryPool contained in the ExecContext used to create the KernelContext.
MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }
const Kernel* kernel() const { return kernel_; }
private:
ExecContext* exec_ctx_;
KernelState* state_ = NULLPTR;
const Kernel* kernel_ = NULLPTR;
};
/// \brief An type-checking interface to permit customizable validation rules
/// for use with InputType and KernelSignature. This is for scenarios where the
/// acceptance is not an exact type instance, such as a TIMESTAMP type for a
/// specific TimeUnit, but permitting any time zone.
struct ARROW_EXPORT TypeMatcher {
virtual ~TypeMatcher() = default;
/// \brief Return true if this matcher accepts the data type.
virtual bool Matches(const DataType& type) const = 0;
/// \brief A human-interpretable string representation of what the type
/// matcher checks for, usable when printing KernelSignature or formatting
/// error messages.
virtual std::string ToString() const = 0;
/// \brief Return true if this TypeMatcher contains the same matching rule as
/// the other. Currently depends on RTTI.
virtual bool Equals(const TypeMatcher& other) const = 0;
};
namespace match {
/// \brief Match any DataType instance having the same DataType::id.
ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);
/// \brief Match any TimestampType instance having the same unit, but the time
/// zones can be different.
ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);
// \brief Match any integer type
ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();
// Match types using 32-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();
// Match types using 64-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();
// Match any fixed binary type
ARROW_EXPORT std::shared_ptr<TypeMatcher> FixedSizeBinaryLike();
// \brief Match any primitive type (boolean or any type representable as a C
// Type)
ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
// \brief Match any integer type that can be used as run-end in run-end encoded
// arrays
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndInteger();
/// \brief Match run-end encoded types that use any valid run-end type and
/// encode specific value types
///
/// @param[in] value_type_matcher a matcher that is applied to the values field
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
std::shared_ptr<TypeMatcher> value_type_matcher);
/// \brief Match run-end encoded types that use any valid run-end type and
/// encode specific value types
///
/// @param[in] value_type_id a type id that the type of the values field should match
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(Type::type value_type_id);
/// \brief Match run-end encoded types that encode specific run-end and value types
///
/// @param[in] run_end_type_matcher a matcher that is applied to the run_ends field
/// @param[in] value_type_matcher a matcher that is applied to the values field
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
std::shared_ptr<TypeMatcher> run_end_type_matcher,
std::shared_ptr<TypeMatcher> value_type_matcher);
} // namespace match
/// \brief An object used for type-checking arguments to be passed to a kernel
/// and stored in a KernelSignature. The type-checking rule can be supplied
/// either with an exact DataType instance or a custom TypeMatcher.
class ARROW_EXPORT InputType {
public:
/// \brief The kind of type-checking rule that the InputType contains.
enum Kind {
/// \brief Accept any value type.
ANY_TYPE,
/// \brief A fixed arrow::DataType and will only exact match having this
/// exact type (e.g. same TimestampType unit, same decimal scale and
/// precision, or same nested child types).
EXACT_TYPE,
/// \brief Uses a TypeMatcher implementation to check the type.
USE_TYPE_MATCHER
};
/// \brief Accept any value type
InputType() : kind_(ANY_TYPE) {}
/// \brief Accept an exact value type.
InputType(std::shared_ptr<DataType> type) // NOLINT implicit construction
: kind_(EXACT_TYPE), type_(std::move(type)) {}
/// \brief Use the passed TypeMatcher to type check.
InputType(std::shared_ptr<TypeMatcher> type_matcher) // NOLINT implicit construction
: kind_(USE_TYPE_MATCHER), type_matcher_(std::move(type_matcher)) {}
/// \brief Match any type with the given Type::type. Uses a TypeMatcher for
/// its implementation.
InputType(Type::type type_id) // NOLINT implicit construction
: InputType(match::SameTypeId(type_id)) {}
InputType(const InputType& other) { CopyInto(other); }
void operator=(const InputType& other) { CopyInto(other); }
InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
// \brief Match any input (array, scalar of any type)
static InputType Any() { return InputType(); }
/// \brief Return true if this input type matches the same type cases as the
/// other.
bool Equals(const InputType& other) const;
bool operator==(const InputType& other) const { return this->Equals(other); }
bool operator!=(const InputType& other) const { return !(*this == other); }
/// \brief Return hash code.
size_t Hash() const;
/// \brief Render a human-readable string representation.
std::string ToString() const;
/// \brief Return true if the Datum matches this argument kind in
/// type (and only allows scalar or array-like Datums).
bool Matches(const Datum& value) const;
/// \brief Return true if the type matches this InputType
bool Matches(const DataType& type) const;
/// \brief The type matching rule that this InputType uses.
Kind kind() const { return kind_; }
/// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
/// must match. Otherwise this function should not be used and will assert in
/// debug builds.
const std::shared_ptr<DataType>& type() const;
/// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
/// checking the type of a value. Otherwise this function should not be used
/// and will assert in debug builds.
const TypeMatcher& type_matcher() const;
private:
void CopyInto(const InputType& other) {
this->kind_ = other.kind_;
this->type_ = other.type_;
this->type_matcher_ = other.type_matcher_;
}
void MoveInto(InputType&& other) {
this->kind_ = other.kind_;
this->type_ = std::move(other.type_);
this->type_matcher_ = std::move(other.type_matcher_);
}
Kind kind_;
// For EXACT_TYPE Kind
std::shared_ptr<DataType> type_;
// For USE_TYPE_MATCHER Kind
std::shared_ptr<TypeMatcher> type_matcher_;
};
/// \brief Container to capture both exact and input-dependent output types.
class ARROW_EXPORT OutputType {
public:
/// \brief An enum indicating whether the value type is an invariant fixed
/// value or one that's computed by a kernel-defined resolver function.
enum ResolveKind { FIXED, COMPUTED };
/// Type resolution function. Given input types, return output type. This
/// function MAY may use the kernel state to decide the output type based on
/// the FunctionOptions.
///
/// This function SHOULD _not_ be used to check for arity, that is to be
/// performed one or more layers above.
using Resolver =
std::function<Result<TypeHolder>(KernelContext*, const std::vector<TypeHolder>&)>;
/// \brief Output an exact type
OutputType(std::shared_ptr<DataType> type) // NOLINT implicit construction
: kind_(FIXED), type_(std::move(type)) {}
/// \brief Output a computed type depending on actual input types
template <typename Fn>
OutputType(Fn resolver) // NOLINT implicit construction
: kind_(COMPUTED), resolver_(std::move(resolver)) {}
OutputType(const OutputType& other) {
this->kind_ = other.kind_;
this->type_ = other.type_;
this->resolver_ = other.resolver_;
}
OutputType(OutputType&& other) {
this->kind_ = other.kind_;
this->type_ = std::move(other.type_);
this->resolver_ = other.resolver_;
}
OutputType& operator=(const OutputType&) = default;
OutputType& operator=(OutputType&&) = default;
/// \brief Return the type of the expected output value of the kernel given
/// the input argument types. The resolver may make use of state information
/// kept in the KernelContext.
Result<TypeHolder> Resolve(KernelContext* ctx,
const std::vector<TypeHolder>& args) const;
/// \brief The exact output value type for the FIXED kind.
const std::shared_ptr<DataType>& type() const;
/// \brief For use with COMPUTED resolution strategy. It may be more
/// convenient to invoke this with OutputType::Resolve returned from this
/// method.
const Resolver& resolver() const;
/// \brief Render a human-readable string representation.
std::string ToString() const;
/// \brief Return the kind of type resolution of this output type, whether
/// fixed/invariant or computed by a resolver.
ResolveKind kind() const { return kind_; }
private:
ResolveKind kind_;
// For FIXED resolution
std::shared_ptr<DataType> type_;
Loading ...