Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / util / float16.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <array>
#include <cstdint>
#include <cstring>
#include <iosfwd>
#include <limits>
#include <type_traits>

#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
#include "arrow/util/ubsan.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace util {

/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t`
///
/// The exact format is as follows (from LSB to MSB):
/// - bits 0-10:  mantissa
/// - bits 10-15: exponent
/// - bit 15:     sign
///
class ARROW_EXPORT Float16 {
 public:
  Float16() = default;
  explicit Float16(float f) : Float16(FromFloat(f)) {}
  explicit Float16(double d) : Float16(FromDouble(d)) {}
  template <typename T,
            typename std::enable_if_t<std::is_convertible_v<T, double>>* = NULLPTR>
  explicit Float16(T v) : Float16(static_cast<double>(v)) {}

  /// \brief Create a `Float16` from its exact binary representation
  constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; }
  /// \brief Create a `Float16` from a 32-bit float (may lose precision)
  static Float16 FromFloat(float f);
  /// \brief Create a `Float16` from a 64-bit float (may lose precision)
  static Float16 FromDouble(double d);

  /// \brief Read a `Float16` from memory in native-endian byte order
  static Float16 FromBytes(const uint8_t* src) {
    return FromBits(SafeLoadAs<uint16_t>(src));
  }

  /// \brief Read a `Float16` from memory in little-endian byte order
  static Float16 FromLittleEndian(const uint8_t* src) {
    return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
  }

  /// \brief Read a `Float16` from memory in big-endian byte order
  static Float16 FromBigEndian(const uint8_t* src) {
    return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
  }

  /// \brief Return the value's binary representation as a `uint16_t`
  constexpr uint16_t bits() const { return bits_; }

  /// \brief Return true if the value is negative (sign bit is set)
  constexpr bool signbit() const { return (bits_ & 0x8000) != 0; }

  /// \brief Return true if the value is NaN
  constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; }
  /// \brief Return true if the value is positive/negative infinity
  constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; }
  /// \brief Return true if the value is finite and not NaN
  constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; }
  /// \brief Return true if the value is positive/negative zero
  constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; }

  /// \brief Convert to a 32-bit float
  float ToFloat() const;
  /// \brief Convert to a 64-bit float
  double ToDouble() const;

  explicit operator float() const { return ToFloat(); }
  explicit operator double() const { return ToDouble(); }

  /// \brief Copy the value's bytes in native-endian byte order
  void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); }
  /// \brief Return the value's bytes in native-endian byte order
  constexpr std::array<uint8_t, 2> ToBytes() const {
#if ARROW_LITTLE_ENDIAN
    return ToLittleEndian();
#else
    return ToBigEndian();
#endif
  }

  /// \brief Copy the value's bytes in little-endian byte order
  void ToLittleEndian(uint8_t* dest) const {
    const auto bytes = ToLittleEndian();
    std::memcpy(dest, bytes.data(), bytes.size());
  }
  /// \brief Return the value's bytes in little-endian byte order
  constexpr std::array<uint8_t, 2> ToLittleEndian() const {
    return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
  }

  /// \brief Copy the value's bytes in big-endian byte order
  void ToBigEndian(uint8_t* dest) const {
    const auto bytes = ToBigEndian();
    std::memcpy(dest, bytes.data(), bytes.size());
  }
  /// \brief Return the value's bytes in big-endian byte order
  constexpr std::array<uint8_t, 2> ToBigEndian() const {
    return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
  }

  constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); }
  constexpr Float16 operator+() const { return FromBits(bits_); }

  friend constexpr bool operator==(Float16 lhs, Float16 rhs) {
    if (lhs.is_nan() || rhs.is_nan()) return false;
    return Float16::CompareEq(lhs, rhs);
  }
  friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); }

  friend constexpr bool operator<(Float16 lhs, Float16 rhs) {
    if (lhs.is_nan() || rhs.is_nan()) return false;
    return Float16::CompareLt(lhs, rhs);
  }
  friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; }

  friend constexpr bool operator<=(Float16 lhs, Float16 rhs) {
    if (lhs.is_nan() || rhs.is_nan()) return false;
    return !Float16::CompareLt(rhs, lhs);
  }
  friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; }

  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg);

 protected:
  uint16_t bits_;

 private:
  constexpr Float16(uint16_t bits, bool) : bits_(bits) {}

  // Comparison helpers that assume neither operand is NaN
  static constexpr bool CompareEq(Float16 lhs, Float16 rhs) {
    return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero());
  }
  static constexpr bool CompareLt(Float16 lhs, Float16 rhs) {
    if (lhs.signbit()) {
      if (rhs.signbit()) {
        // Both are negative
        return lhs.bits() > rhs.bits();
      } else {
        // Handle +/-0
        return !lhs.is_zero() || rhs.bits() != 0;
      }
    } else if (rhs.signbit()) {
      return false;
    } else {
      // Both are positive
      return lhs.bits() < rhs.bits();
    }
  }
};

static_assert(std::is_trivial_v<Float16>);

}  // namespace util
}  // namespace arrow

// TODO: Not complete
template <>
class std::numeric_limits<arrow::util::Float16> {
  using T = arrow::util::Float16;

 public:
  static constexpr bool is_specialized = true;
  static constexpr bool is_signed = true;
  static constexpr bool has_infinity = true;
  static constexpr bool has_quiet_NaN = true;

  static constexpr T min() { return T::FromBits(0b0000010000000000); }
  static constexpr T max() { return T::FromBits(0b0111101111111111); }
  static constexpr T lowest() { return -max(); }

  static constexpr T infinity() { return T::FromBits(0b0111110000000000); }

  static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); }
};