// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <atomic>
#include <climits>
#include <cstdint>
#include <iosfwd>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "arrow/result.h"
#include "arrow/type_fwd.h" // IWYU pragma: export
#include "arrow/util/checked_cast.h"
#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h" // IWYU pragma: keep
namespace arrow {
namespace detail {
/// \defgroup numeric-datatypes Datatypes for numeric data
/// @{
/// @}
/// \defgroup binary-datatypes Datatypes for binary/string data
/// @{
/// @}
/// \defgroup temporal-datatypes Datatypes for temporal data
/// @{
/// @}
/// \defgroup nested-datatypes Datatypes for nested data
/// @{
/// @}
class ARROW_EXPORT Fingerprintable {
public:
virtual ~Fingerprintable();
const std::string& fingerprint() const {
auto p = fingerprint_.load();
if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
return *p;
}
return LoadFingerprintSlow();
}
const std::string& metadata_fingerprint() const {
auto p = metadata_fingerprint_.load();
if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
return *p;
}
return LoadMetadataFingerprintSlow();
}
protected:
const std::string& LoadFingerprintSlow() const;
const std::string& LoadMetadataFingerprintSlow() const;
virtual std::string ComputeFingerprint() const = 0;
virtual std::string ComputeMetadataFingerprint() const = 0;
mutable std::atomic<std::string*> fingerprint_{NULLPTR};
mutable std::atomic<std::string*> metadata_fingerprint_{NULLPTR};
};
} // namespace detail
/// EXPERIMENTAL: Layout specification for a data type
struct ARROW_EXPORT DataTypeLayout {
enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };
/// Layout specification for a single data type buffer
struct BufferSpec {
BufferKind kind;
int64_t byte_width; // For FIXED_WIDTH
bool operator==(const BufferSpec& other) const {
return kind == other.kind &&
(kind != FIXED_WIDTH || byte_width == other.byte_width);
}
bool operator!=(const BufferSpec& other) const { return !(*this == other); }
};
static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }
/// A vector of buffer layout specifications, one for each expected buffer
std::vector<BufferSpec> buffers;
/// Whether this type expects an associated dictionary array.
bool has_dictionary = false;
/// If this is provided, the number of buffers expected is only lower-bounded by
/// buffers.size(). Buffers beyond this lower bound are expected to conform to
/// variadic_spec.
std::optional<BufferSpec> variadic_spec;
explicit DataTypeLayout(std::vector<BufferSpec> buffers,
std::optional<BufferSpec> variadic_spec = {})
: buffers(std::move(buffers)), variadic_spec(variadic_spec) {}
};
/// \brief Base class for all data types
///
/// Data types in this library are all *logical*. They can be expressed as
/// either a primitive physical type (bytes or bits of some fixed size), a
/// nested type consisting of other data types, or another data type (e.g. a
/// timestamp encoded as an int64).
///
/// Simple datatypes may be entirely described by their Type::type id, but
/// complex datatypes are usually parametric.
class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
public detail::Fingerprintable,
public util::EqualityComparable<DataType> {
public:
explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
~DataType() override;
/// \brief Return whether the types are equal
///
/// Types that are logically convertible from one to another (e.g. List<UInt8>
/// and Binary) are NOT equal.
bool Equals(const DataType& other, bool check_metadata = false) const;
/// \brief Return whether the types are equal
bool Equals(const std::shared_ptr<DataType>& other, bool check_metadata = false) const;
/// \brief Return the child field at index i.
const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
/// \brief Return the children fields associated with this type.
const FieldVector& fields() const { return children_; }
/// \brief Return the number of children fields associated with this type.
int num_fields() const { return static_cast<int>(children_.size()); }
/// \brief Apply the TypeVisitor::Visit() method specialized to the data type
Status Accept(TypeVisitor* visitor) const;
/// \brief A string representation of the type, including any children
virtual std::string ToString(bool show_metadata = false) const = 0;
/// \brief Return hash value (excluding metadata in child fields)
size_t Hash() const;
/// \brief A string name of the type, omitting any child fields
///
/// \since 0.7.0
virtual std::string name() const = 0;
/// \brief Return the data type layout. Children are not included.
///
/// \note Experimental API
virtual DataTypeLayout layout() const = 0;
/// \brief Return the type category
Type::type id() const { return id_; }
/// \brief Return the type category of the storage type
virtual Type::type storage_id() const { return id_; }
/// \brief Returns the type's fixed byte width, if any. Returns -1
/// for non-fixed-width types, and should only be used for
/// subclasses of FixedWidthType
virtual int32_t byte_width() const {
int32_t num_bits = this->bit_width();
return num_bits > 0 ? num_bits / 8 : -1;
}
/// \brief Returns the type's fixed bit width, if any. Returns -1
/// for non-fixed-width types, and should only be used for
/// subclasses of FixedWidthType
virtual int bit_width() const { return -1; }
// \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
// context.
std::shared_ptr<DataType> GetSharedPtr() const {
return const_cast<DataType*>(this)->shared_from_this();
}
protected:
// Dummy version that returns a null string (indicating not implemented).
// Subclasses should override for fast equality checks.
std::string ComputeFingerprint() const override;
// Generic versions that works for all regular types, nested or not.
std::string ComputeMetadataFingerprint() const override;
Type::type id_;
FieldVector children_;
private:
ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
};
/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
/// dynamically created shared_ptr<DataType> if it needs to.
struct ARROW_EXPORT TypeHolder {
const DataType* type = NULLPTR;
std::shared_ptr<DataType> owned_type;
TypeHolder() = default;
TypeHolder(const TypeHolder& other) = default;
TypeHolder& operator=(const TypeHolder& other) = default;
TypeHolder(TypeHolder&& other) = default;
TypeHolder& operator=(TypeHolder&& other) = default;
TypeHolder(std::shared_ptr<DataType> owned_type) // NOLINT implicit construction
: type(owned_type.get()), owned_type(std::move(owned_type)) {}
TypeHolder(const DataType* type) // NOLINT implicit construction
: type(type) {}
Type::type id() const { return this->type->id(); }
std::shared_ptr<DataType> GetSharedPtr() const {
return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
}
const DataType& operator*() const { return *this->type; }
operator bool() const { return this->type != NULLPTR; }
bool operator==(const TypeHolder& other) const {
if (type == other.type) return true;
if (type == NULLPTR || other.type == NULLPTR) return false;
return type->Equals(*other.type);
}
bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }
bool operator==(const DataType& other) const {
if (this->type == NULLPTR) return false;
return other.Equals(*this->type);
}
bool operator!=(const DataType& other) const { return !(*this == other); }
bool operator==(const std::shared_ptr<DataType>& other) const {
return *this == *other;
}
bool operator!=(const TypeHolder& other) const { return !(*this == other); }
std::string ToString(bool show_metadata = false) const {
return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
}
static std::string ToString(const std::vector<TypeHolder>&, bool show_metadata = false);
static std::vector<TypeHolder> FromTypes(
const std::vector<std::shared_ptr<DataType>>& types);
};
ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const DataType& type);
ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const TypeHolder& type);
/// \brief Return the compatible physical data type
///
/// Some types may have distinct logical meanings but the exact same physical
/// representation. For example, TimestampType has Int64Type as a physical
/// type (defined as TimestampType::PhysicalType).
///
/// The return value is as follows:
/// - if a `PhysicalType` alias exists in the concrete type class, return
/// an instance of `PhysicalType`.
/// - otherwise, return the input type itself.
std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);
/// \brief Base class for all fixed-width data types
class ARROW_EXPORT FixedWidthType : public DataType {
public:
using DataType::DataType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~FixedWidthType() override;
};
/// \brief Base class for all data types representing primitive values
class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
public:
using FixedWidthType::FixedWidthType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~PrimitiveCType() override;
};
/// \brief Base class for all numeric data types
class ARROW_EXPORT NumberType : public PrimitiveCType {
public:
using PrimitiveCType::PrimitiveCType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~NumberType() override;
};
/// \brief Base class for all integral data types
class ARROW_EXPORT IntegerType : public NumberType {
public:
using NumberType::NumberType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~IntegerType() override;
virtual bool is_signed() const = 0;
};
/// \brief Base class for all floating-point data types
class ARROW_EXPORT FloatingPointType : public NumberType {
public:
using NumberType::NumberType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~FloatingPointType() override;
enum Precision { HALF, SINGLE, DOUBLE };
virtual Precision precision() const = 0;
};
/// \brief Base class for all parametric data types
Loading ...