Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev70 

/ include / arrow / scalar.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// Object model for scalar (non-Array) values. Not intended for use with large
// amounts of data

#pragma once

#include <iosfwd>
#include <memory>
#include <ratio>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

#include "arrow/compare.h"
#include "arrow/extension_type.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/compare.h"
#include "arrow/util/decimal.h"
#include "arrow/util/visibility.h"
#include "arrow/visit_type_inline.h"

namespace arrow {

class Array;

/// \brief Base class for scalar values
///
/// A Scalar represents a single value with a specific DataType.
/// Scalars are useful for passing single value inputs to compute functions,
/// or for representing individual array elements (with a non-trivial
/// wrapping cost, though).
struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
                             public util::EqualityComparable<Scalar> {
  virtual ~Scalar() = default;

  /// \brief The type of the scalar value
  std::shared_ptr<DataType> type;

  /// \brief Whether the value is valid (not null) or not
  bool is_valid = false;

  bool Equals(const Scalar& other,
              const EqualOptions& options = EqualOptions::Defaults()) const;

  bool ApproxEquals(const Scalar& other,
                    const EqualOptions& options = EqualOptions::Defaults()) const;

  struct ARROW_EXPORT Hash {
    size_t operator()(const Scalar& scalar) const { return scalar.hash(); }

    size_t operator()(const std::shared_ptr<Scalar>& scalar) const {
      return scalar->hash();
    }
  };

  size_t hash() const;

  std::string ToString() const;

  /// \brief Perform cheap validation checks
  ///
  /// This is O(k) where k is the number of descendents.
  ///
  /// \return Status
  Status Validate() const;

  /// \brief Perform extensive data validation checks
  ///
  /// This is potentially O(k*n) where k is the number of descendents and n
  /// is the length of descendents (if list scalars are involved).
  ///
  /// \return Status
  Status ValidateFull() const;

  static Result<std::shared_ptr<Scalar>> Parse(const std::shared_ptr<DataType>& type,
                                               std::string_view repr);

  // TODO(bkietz) add compute::CastOptions
  Result<std::shared_ptr<Scalar>> CastTo(std::shared_ptr<DataType> to) const;

  /// \brief Apply the ScalarVisitor::Visit() method specialized to the scalar type
  Status Accept(ScalarVisitor* visitor) const;

  /// \brief EXPERIMENTAL Enable obtaining shared_ptr<Scalar> from a const
  /// Scalar& context.
  std::shared_ptr<Scalar> GetSharedPtr() const {
    return const_cast<Scalar*>(this)->shared_from_this();
  }

 protected:
  Scalar(std::shared_ptr<DataType> type, bool is_valid)
      : type(std::move(type)), is_valid(is_valid) {}
};

ARROW_EXPORT void PrintTo(const Scalar& scalar, std::ostream* os);

/// \defgroup concrete-scalar-classes Concrete Scalar subclasses
///
/// @{

/// \brief A scalar value for NullType. Never valid
struct ARROW_EXPORT NullScalar : public Scalar {
 public:
  using TypeClass = NullType;

  NullScalar() : Scalar{null(), false} {}
};

/// @}

namespace internal {

constexpr auto kScalarScratchSpaceSize = sizeof(int64_t) * 2;

template <typename Impl>
struct ArraySpanFillFromScalarScratchSpace {
  //  16 bytes of scratch space to enable ArraySpan to be a view onto any
  //  Scalar- including binary scalars where we need to create a buffer
  //  that looks like two 32-bit or 64-bit offsets.
  alignas(int64_t) mutable uint8_t scratch_space_[kScalarScratchSpaceSize];

 private:
  template <typename... Args>
  explicit ArraySpanFillFromScalarScratchSpace(Args&&... args) {
    Impl::FillScratchSpace(scratch_space_, std::forward<Args>(args)...);
  }

  ArraySpanFillFromScalarScratchSpace() = delete;

  friend Impl;
};

struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
  explicit PrimitiveScalarBase(std::shared_ptr<DataType> type)
      : Scalar(std::move(type), false) {}

  using Scalar::Scalar;
  /// \brief Get a const pointer to the value of this scalar. May be null.
  virtual const void* data() const = 0;
  /// \brief Get an immutable view of the value of this scalar as bytes.
  virtual std::string_view view() const = 0;
};

template <typename T, typename CType = typename T::c_type>
struct PrimitiveScalar : public PrimitiveScalarBase {
  using PrimitiveScalarBase::PrimitiveScalarBase;
  using TypeClass = T;
  using ValueType = CType;

  // Non-null constructor.
  PrimitiveScalar(ValueType value, std::shared_ptr<DataType> type)
      : PrimitiveScalarBase(std::move(type), true), value(value) {}

  explicit PrimitiveScalar(std::shared_ptr<DataType> type)
      : PrimitiveScalarBase(std::move(type), false) {}

  ValueType value{};

  const void* data() const override { return &value; }
  std::string_view view() const override {
    return std::string_view(reinterpret_cast<const char*>(&value), sizeof(ValueType));
  };
};

}  // namespace internal

/// \addtogroup concrete-scalar-classes Concrete Scalar subclasses
///
/// @{

struct ARROW_EXPORT BooleanScalar : public internal::PrimitiveScalar<BooleanType, bool> {
  using Base = internal::PrimitiveScalar<BooleanType, bool>;
  using Base::Base;

  explicit BooleanScalar(bool value) : Base(value, boolean()) {}

  BooleanScalar() : Base(boolean()) {}
};

template <typename T>
struct NumericScalar : public internal::PrimitiveScalar<T> {
  using Base = typename internal::PrimitiveScalar<T>;
  using Base::Base;
  using TypeClass = typename Base::TypeClass;
  using ValueType = typename Base::ValueType;

  explicit NumericScalar(ValueType value)
      : Base(value, TypeTraits<T>::type_singleton()) {}

  NumericScalar() : Base(TypeTraits<T>::type_singleton()) {}
};

struct ARROW_EXPORT Int8Scalar : public NumericScalar<Int8Type> {
  using NumericScalar<Int8Type>::NumericScalar;
};

struct ARROW_EXPORT Int16Scalar : public NumericScalar<Int16Type> {
  using NumericScalar<Int16Type>::NumericScalar;
};

struct ARROW_EXPORT Int32Scalar : public NumericScalar<Int32Type> {
  using NumericScalar<Int32Type>::NumericScalar;
};

struct ARROW_EXPORT Int64Scalar : public NumericScalar<Int64Type> {
  using NumericScalar<Int64Type>::NumericScalar;
};

struct ARROW_EXPORT UInt8Scalar : public NumericScalar<UInt8Type> {
  using NumericScalar<UInt8Type>::NumericScalar;
};

struct ARROW_EXPORT UInt16Scalar : public NumericScalar<UInt16Type> {
  using NumericScalar<UInt16Type>::NumericScalar;
};

struct ARROW_EXPORT UInt32Scalar : public NumericScalar<UInt32Type> {
  using NumericScalar<UInt32Type>::NumericScalar;
};

struct ARROW_EXPORT UInt64Scalar : public NumericScalar<UInt64Type> {
  using NumericScalar<UInt64Type>::NumericScalar;
};

struct ARROW_EXPORT HalfFloatScalar : public NumericScalar<HalfFloatType> {
  using NumericScalar<HalfFloatType>::NumericScalar;
};

struct ARROW_EXPORT FloatScalar : public NumericScalar<FloatType> {
  using NumericScalar<FloatType>::NumericScalar;
};

struct ARROW_EXPORT DoubleScalar : public NumericScalar<DoubleType> {
  using NumericScalar<DoubleType>::NumericScalar;
};

struct ARROW_EXPORT BaseBinaryScalar : public internal::PrimitiveScalarBase {
  using ValueType = std::shared_ptr<Buffer>;

  // The value is not supposed to be modified after construction, because subclasses have
  // a scratch space whose content need to be kept consistent with the value. It is also
  // the user of this class's responsibility to ensure that the buffer is not written to
  // accidentally.
  const std::shared_ptr<Buffer> value = NULLPTR;

  const void* data() const override {
    return value ? reinterpret_cast<const void*>(value->data()) : NULLPTR;
  }
  std::string_view view() const override {
    return value ? std::string_view(*value) : std::string_view();
  }

  explicit BaseBinaryScalar(std::shared_ptr<DataType> type)
      : internal::PrimitiveScalarBase(std::move(type)) {}

  BaseBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
      : internal::PrimitiveScalarBase{std::move(type), true}, value(std::move(value)) {}

  BaseBinaryScalar(std::string s, std::shared_ptr<DataType> type);
};

struct ARROW_EXPORT BinaryScalar
    : public BaseBinaryScalar,
      private internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar> {
  using TypeClass = BinaryType;
  using ArraySpanFillFromScalarScratchSpace =
      internal::ArraySpanFillFromScalarScratchSpace<BinaryScalar>;

  explicit BinaryScalar(std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(type)),
        ArraySpanFillFromScalarScratchSpace(this->value) {}

  BinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(value), std::move(type)),
        ArraySpanFillFromScalarScratchSpace(this->value) {}

  BinaryScalar(std::string s, std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(s), std::move(type)),
        ArraySpanFillFromScalarScratchSpace(this->value) {}

  explicit BinaryScalar(std::shared_ptr<Buffer> value)
      : BinaryScalar(std::move(value), binary()) {}

  explicit BinaryScalar(std::string s) : BinaryScalar(std::move(s), binary()) {}

  BinaryScalar() : BinaryScalar(binary()) {}

 private:
  static void FillScratchSpace(uint8_t* scratch_space,
                               const std::shared_ptr<Buffer>& value);

  friend ArraySpan;
  friend ArraySpanFillFromScalarScratchSpace;
};

struct ARROW_EXPORT StringScalar : public BinaryScalar {
  using BinaryScalar::BinaryScalar;
  using TypeClass = StringType;

  explicit StringScalar(std::shared_ptr<Buffer> value)
      : StringScalar(std::move(value), utf8()) {}

  explicit StringScalar(std::string s) : BinaryScalar(std::move(s), utf8()) {}

  StringScalar() : StringScalar(utf8()) {}
};

struct ARROW_EXPORT BinaryViewScalar
    : public BaseBinaryScalar,
      private internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar> {
  using TypeClass = BinaryViewType;
  using ArraySpanFillFromScalarScratchSpace =
      internal::ArraySpanFillFromScalarScratchSpace<BinaryViewScalar>;

  explicit BinaryViewScalar(std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(type)),
        ArraySpanFillFromScalarScratchSpace(this->value) {}

  BinaryViewScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(value), std::move(type)),
        ArraySpanFillFromScalarScratchSpace(this->value) {}

  BinaryViewScalar(std::string s, std::shared_ptr<DataType> type)
      : BaseBinaryScalar(std::move(s), std::move(type)),
Loading ...