Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev70 

/ include / arrow / type.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <atomic>
#include <climits>
#include <cstdint>
#include <iosfwd>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "arrow/result.h"
#include "arrow/type_fwd.h"  // IWYU pragma: export
#include "arrow/util/checked_cast.h"
#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h"  // IWYU pragma: keep

namespace arrow {
namespace detail {

/// \defgroup numeric-datatypes Datatypes for numeric data
/// @{
/// @}

/// \defgroup binary-datatypes Datatypes for binary/string data
/// @{
/// @}

/// \defgroup temporal-datatypes Datatypes for temporal data
/// @{
/// @}

/// \defgroup nested-datatypes Datatypes for nested data
/// @{
/// @}

class ARROW_EXPORT Fingerprintable {
 public:
  virtual ~Fingerprintable();

  const std::string& fingerprint() const {
    auto p = fingerprint_.load();
    if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
      return *p;
    }
    return LoadFingerprintSlow();
  }

  const std::string& metadata_fingerprint() const {
    auto p = metadata_fingerprint_.load();
    if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
      return *p;
    }
    return LoadMetadataFingerprintSlow();
  }

 protected:
  const std::string& LoadFingerprintSlow() const;
  const std::string& LoadMetadataFingerprintSlow() const;

  virtual std::string ComputeFingerprint() const = 0;
  virtual std::string ComputeMetadataFingerprint() const = 0;

  mutable std::atomic<std::string*> fingerprint_{NULLPTR};
  mutable std::atomic<std::string*> metadata_fingerprint_{NULLPTR};
};

}  // namespace detail

/// EXPERIMENTAL: Layout specification for a data type
struct ARROW_EXPORT DataTypeLayout {
  enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };

  /// Layout specification for a single data type buffer
  struct BufferSpec {
    BufferKind kind;
    int64_t byte_width;  // For FIXED_WIDTH

    bool operator==(const BufferSpec& other) const {
      return kind == other.kind &&
             (kind != FIXED_WIDTH || byte_width == other.byte_width);
    }
    bool operator!=(const BufferSpec& other) const { return !(*this == other); }
  };

  static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
  static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
  static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
  static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }

  /// A vector of buffer layout specifications, one for each expected buffer
  std::vector<BufferSpec> buffers;
  /// Whether this type expects an associated dictionary array.
  bool has_dictionary = false;
  /// If this is provided, the number of buffers expected is only lower-bounded by
  /// buffers.size(). Buffers beyond this lower bound are expected to conform to
  /// variadic_spec.
  std::optional<BufferSpec> variadic_spec;

  explicit DataTypeLayout(std::vector<BufferSpec> buffers,
                          std::optional<BufferSpec> variadic_spec = {})
      : buffers(std::move(buffers)), variadic_spec(variadic_spec) {}
};

/// \brief Base class for all data types
///
/// Data types in this library are all *logical*. They can be expressed as
/// either a primitive physical type (bytes or bits of some fixed size), a
/// nested type consisting of other data types, or another data type (e.g. a
/// timestamp encoded as an int64).
///
/// Simple datatypes may be entirely described by their Type::type id, but
/// complex datatypes are usually parametric.
class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
                              public detail::Fingerprintable,
                              public util::EqualityComparable<DataType> {
 public:
  explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
  ~DataType() override;

  /// \brief Return whether the types are equal
  ///
  /// Types that are logically convertible from one to another (e.g. List<UInt8>
  /// and Binary) are NOT equal.
  bool Equals(const DataType& other, bool check_metadata = false) const;

  /// \brief Return whether the types are equal
  bool Equals(const std::shared_ptr<DataType>& other, bool check_metadata = false) const;

  /// \brief Return the child field at index i.
  const std::shared_ptr<Field>& field(int i) const { return children_[i]; }

  /// \brief Return the children fields associated with this type.
  const FieldVector& fields() const { return children_; }

  /// \brief Return the number of children fields associated with this type.
  int num_fields() const { return static_cast<int>(children_.size()); }

  /// \brief Apply the TypeVisitor::Visit() method specialized to the data type
  Status Accept(TypeVisitor* visitor) const;

  /// \brief A string representation of the type, including any children
  virtual std::string ToString(bool show_metadata = false) const = 0;

  /// \brief Return hash value (excluding metadata in child fields)
  size_t Hash() const;

  /// \brief A string name of the type, omitting any child fields
  ///
  /// \since 0.7.0
  virtual std::string name() const = 0;

  /// \brief Return the data type layout.  Children are not included.
  ///
  /// \note Experimental API
  virtual DataTypeLayout layout() const = 0;

  /// \brief Return the type category
  Type::type id() const { return id_; }

  /// \brief Return the type category of the storage type
  virtual Type::type storage_id() const { return id_; }

  /// \brief Returns the type's fixed byte width, if any. Returns -1
  /// for non-fixed-width types, and should only be used for
  /// subclasses of FixedWidthType
  virtual int32_t byte_width() const {
    int32_t num_bits = this->bit_width();
    return num_bits > 0 ? num_bits / 8 : -1;
  }

  /// \brief Returns the type's fixed bit width, if any. Returns -1
  /// for non-fixed-width types, and should only be used for
  /// subclasses of FixedWidthType
  virtual int bit_width() const { return -1; }

  // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
  // context.
  std::shared_ptr<DataType> GetSharedPtr() const {
    return const_cast<DataType*>(this)->shared_from_this();
  }

 protected:
  // Dummy version that returns a null string (indicating not implemented).
  // Subclasses should override for fast equality checks.
  std::string ComputeFingerprint() const override;

  // Generic versions that works for all regular types, nested or not.
  std::string ComputeMetadataFingerprint() const override;

  Type::type id_;
  FieldVector children_;

 private:
  ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
};

/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
/// dynamically created shared_ptr<DataType> if it needs to.
struct ARROW_EXPORT TypeHolder {
  const DataType* type = NULLPTR;
  std::shared_ptr<DataType> owned_type;

  TypeHolder() = default;
  TypeHolder(const TypeHolder& other) = default;
  TypeHolder& operator=(const TypeHolder& other) = default;
  TypeHolder(TypeHolder&& other) = default;
  TypeHolder& operator=(TypeHolder&& other) = default;

  TypeHolder(std::shared_ptr<DataType> owned_type)  // NOLINT implicit construction
      : type(owned_type.get()), owned_type(std::move(owned_type)) {}

  TypeHolder(const DataType* type)  // NOLINT implicit construction
      : type(type) {}

  Type::type id() const { return this->type->id(); }

  std::shared_ptr<DataType> GetSharedPtr() const {
    return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
  }

  const DataType& operator*() const { return *this->type; }

  operator bool() const { return this->type != NULLPTR; }

  bool operator==(const TypeHolder& other) const {
    if (type == other.type) return true;
    if (type == NULLPTR || other.type == NULLPTR) return false;
    return type->Equals(*other.type);
  }

  bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }

  bool operator==(const DataType& other) const {
    if (this->type == NULLPTR) return false;
    return other.Equals(*this->type);
  }

  bool operator!=(const DataType& other) const { return !(*this == other); }

  bool operator==(const std::shared_ptr<DataType>& other) const {
    return *this == *other;
  }

  bool operator!=(const TypeHolder& other) const { return !(*this == other); }

  std::string ToString(bool show_metadata = false) const {
    return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
  }

  static std::string ToString(const std::vector<TypeHolder>&, bool show_metadata = false);

  static std::vector<TypeHolder> FromTypes(
      const std::vector<std::shared_ptr<DataType>>& types);
};

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const DataType& type);

ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const TypeHolder& type);

/// \brief Return the compatible physical data type
///
/// Some types may have distinct logical meanings but the exact same physical
/// representation.  For example, TimestampType has Int64Type as a physical
/// type (defined as TimestampType::PhysicalType).
///
/// The return value is as follows:
/// - if a `PhysicalType` alias exists in the concrete type class, return
///   an instance of `PhysicalType`.
/// - otherwise, return the input type itself.
std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);

/// \brief Base class for all fixed-width data types
class ARROW_EXPORT FixedWidthType : public DataType {
 public:
  using DataType::DataType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~FixedWidthType() override;
};

/// \brief Base class for all data types representing primitive values
class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
 public:
  using FixedWidthType::FixedWidthType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~PrimitiveCType() override;
};

/// \brief Base class for all numeric data types
class ARROW_EXPORT NumberType : public PrimitiveCType {
 public:
  using PrimitiveCType::PrimitiveCType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~NumberType() override;
};

/// \brief Base class for all integral data types
class ARROW_EXPORT IntegerType : public NumberType {
 public:
  using NumberType::NumberType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~IntegerType() override;
  virtual bool is_signed() const = 0;
};

/// \brief Base class for all floating-point data types
class ARROW_EXPORT FloatingPointType : public NumberType {
 public:
  using NumberType::NumberType;
  // This is only for preventing defining this class in each
  // translation unit to avoid one-definition-rule violation.
  ~FloatingPointType() override;
  enum Precision { HALF, SINGLE, DOUBLE };
  virtual Precision precision() const = 0;
};

/// \brief Base class for all parametric data types
Loading ...