Gemfury

flet / flet-libgdal python

Repository URL to install this package:
Details
flet-libgdal / include / gdal_minmax_element.hpp
/******************************************************************************
 * Project:  GDAL Core
 * Purpose:  Utility functions to find minimum and maximum values in a buffer
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
 *
 ******************************************************************************
 * Copyright (c) 2024, Even Rouault <even dot rouault at spatialys.com>
 *
 * SPDX-License-Identifier: MIT
 ****************************************************************************/

#ifndef GDAL_MINMAX_ELEMENT_INCLUDED
#define GDAL_MINMAX_ELEMENT_INCLUDED

// NOTE: This header requires C++17

// This file may be vendored by other applications than GDAL
// WARNING: if modifying this file, please also update the upstream GDAL version
// at https://github.com/OSGeo/gdal/blob/master/gcore/gdal_minmax_element.hpp

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <limits>
#include <type_traits>
#include <utility>

#include "gdal.h"

// Just to please cppcheck
#ifndef GDAL_COMPUTE_VERSION
#define GDAL_COMPUTE_VERSION(maj, min, rev)                                    \
    ((maj) * 1000000 + (min) * 10000 + (rev) * 100)
#endif

#ifdef GDAL_COMPILATION
#include "cpl_float.h"
#define GDAL_MINMAXELT_NS gdal
#elif !defined(GDAL_MINMAXELT_NS)
#error "Please define the GDAL_MINMAXELT_NS macro to define the namespace"
#elif GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
#include "cpl_float.h"
#endif

#ifdef USE_NEON_OPTIMIZATIONS
#include "include_sse2neon.h"
#define GDAL_MINMAX_ELEMENT_USE_SSE2
#else
#if defined(__x86_64) || defined(_M_X64)
#define GDAL_MINMAX_ELEMENT_USE_SSE2
#endif
#ifdef GDAL_MINMAX_ELEMENT_USE_SSE2
// SSE2 header
#include <emmintrin.h>
#if defined(__SSE4_1__) || defined(__AVX__)
#include <smmintrin.h>
#endif
#endif
#endif

#include "gdal_priv_templates.hpp"

namespace GDAL_MINMAXELT_NS
{
namespace detail
{

template <class T> struct is_floating_point
{
    static constexpr bool value = std::is_floating_point_v<T>
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
                                  || std::is_same_v<T, GFloat16>
#endif
        ;
};

template <class T>
constexpr bool is_floating_point_v = is_floating_point<T>::value;

/************************************************************************/
/*                             compScalar()                             */
/************************************************************************/

template <class T, bool IS_MAX> inline static bool compScalar(T x, T y)
{
    if constexpr (IS_MAX)
        return x > y;
    else
        return x < y;
}

template <typename T> inline static bool IsNan(T x)
{
    // We need to write `using std::isnan` instead of directly using
    // `std::isnan` because `std::isnan` only supports the types
    // `float` and `double`. The `isnan` for `cpl::Float16` is found in the
    // `cpl` namespace via argument-dependent lookup
    // <https://en.cppreference.com/w/cpp/language/adl>.
    using std::isnan;
    return isnan(x);
}

template <class T> inline static bool compEqual(T x, T y)
{
    return x == y;
}

// On Intel/Neon, we do comparisons on uint16_t instead of casting to float for
// faster execution.
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0) &&                      \
    defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
template <> bool IsNan<GFloat16>(GFloat16 x)
{
    uint16_t iX;
    memcpy(&iX, &x, sizeof(x));
    // Check that the 5 bits of the exponent are set and the mantissa is not zero.
    return (iX & 0x7fff) > 0x7c00;
}

template <> bool compEqual<GFloat16>(GFloat16 x, GFloat16 y)
{
    uint16_t iX, iY;
    memcpy(&iX, &x, sizeof(x));
    memcpy(&iY, &y, sizeof(y));
    // Given our usage where y cannot be NaN we can skip the IsNan tests
    assert(!IsNan(y));
    return (iX == iY ||
            // Also check +0 == -0
            ((iX | iY) == (1 << 15)));
}

template <> bool compScalar<GFloat16, true>(GFloat16 x, GFloat16 y)
{
    uint16_t iX, iY;
    memcpy(&iX, &x, sizeof(x));
    memcpy(&iY, &y, sizeof(y));
    bool ret;
    if (IsNan(x) || IsNan(y))
    {
        ret = false;
    }
    else if (!(iX >> 15))
    {
        // +0 considered > -0. We don't really care
        ret = (iY >> 15) || iX > iY;
    }
    else
    {
        ret = (iY >> 15) && iX < iY;
    }
    return ret;
}

template <> bool compScalar<GFloat16, false>(GFloat16 x, GFloat16 y)
{
    return compScalar<GFloat16, true>(y, x);
}
#endif

/************************************************************************/
/*                 extremum_element_with_nan_generic()                  */
/************************************************************************/

template <class T, bool IS_MAX>
inline size_t extremum_element_with_nan_generic(const T *v, size_t size)
{
    if (size == 0)
        return 0;
    size_t idx_of_extremum = 0;
    auto extremum = v[0];
    bool extremum_is_nan = IsNan(extremum);
    size_t i = 1;
    for (; i < size; ++i)
    {
        if (compScalar<T, IS_MAX>(v[i], extremum) ||
            (extremum_is_nan && !IsNan(v[i])))
        {
            extremum = v[i];
            idx_of_extremum = i;
            extremum_is_nan = false;
        }
    }
    return idx_of_extremum;
}

/************************************************************************/
/*                 extremum_element_with_nan_generic()                  */
/************************************************************************/

template <class T, bool IS_MAX>
inline size_t extremum_element_with_nan_generic(const T *v, size_t size,
                                                T noDataValue)
{
    if (IsNan(noDataValue))
        return extremum_element_with_nan_generic<T, IS_MAX>(v, size);
    if (size == 0)
        return 0;
    size_t idx_of_extremum = 0;
    auto extremum = v[0];
    bool extremum_is_nan_or_nodata =
        IsNan(extremum) || compEqual(extremum, noDataValue);
    size_t i = 1;
    for (; i < size; ++i)
    {
        if (!compEqual(v[i], noDataValue) &&
            (compScalar<T, IS_MAX>(v[i], extremum) ||
             (extremum_is_nan_or_nodata && !IsNan(v[i]))))
        {
            extremum = v[i];
            idx_of_extremum = i;
            extremum_is_nan_or_nodata = false;
        }
    }
    return idx_of_extremum;
}

/************************************************************************/
/*                      extremum_element_generic()                      */
/************************************************************************/

template <class T, bool IS_MAX>
inline size_t extremum_element_generic(const T *buffer, size_t size,
                                       bool bHasNoData, T noDataValue)
{
    if (bHasNoData)
    {
        if constexpr (is_floating_point_v<T>)
        {
            if (IsNan(noDataValue))
            {
                if constexpr (IS_MAX)
                {
                    return std::max_element(buffer, buffer + size,
                                            [](T a, T b)
                                            {
                                                return IsNan(b)   ? false
                                                       : IsNan(a) ? true
                                                                  : a < b;
                                            }) -
                           buffer;
                }
                else
                {
                    return std::min_element(buffer, buffer + size,
                                            [](T a, T b)
                                            {
                                                return IsNan(b)   ? true
                                                       : IsNan(a) ? false
                                                                  : a < b;
                                            }) -
                           buffer;
                }
            }
            else
            {
                if constexpr (IS_MAX)
                {
                    return std::max_element(buffer, buffer + size,
                                            [noDataValue](T a, T b)
                                            {
                                                return IsNan(b)   ? false
                                                       : IsNan(a) ? true
                                                       : (b == noDataValue)
                                                           ? false
                                                       : (a == noDataValue)
                                                           ? true
                                                           : a < b;
                                            }) -
                           buffer;
                }
                else
                {
                    return std::min_element(buffer, buffer + size,
                                            [noDataValue](T a, T b)
                                            {
                                                return IsNan(b)   ? true
                                                       : IsNan(a) ? false
                                                       : (b == noDataValue)
                                                           ? true
                                                       : (a == noDataValue)
                                                           ? false
                                                           : a < b;
                                            }) -
                           buffer;
                }
            }
        }
        else
        {
            if constexpr (IS_MAX)
            {
                return std::max_element(buffer, buffer + size,
                                        [noDataValue](T a, T b)
                                        {
                                            return (b == noDataValue)   ? false
                                                   : (a == noDataValue) ? true
                                                                        : a < b;
                                        }) -
                       buffer;
            }
            else
            {
                return std::min_element(buffer, buffer + size,
                                        [noDataValue](T a, T b)
                                        {
                                            return (b == noDataValue)   ? true
                                                   : (a == noDataValue) ? false
                                                                        : a < b;
                                        }) -
                       buffer;
            }
        }
    }
    else
    {
        if constexpr (is_floating_point_v<T>)
        {
            if constexpr (IS_MAX)
            {
                return std::max_element(buffer, buffer + size,
                                        [](T a, T b)
                                        {
                                            return IsNan(b)   ? false
                                                   : IsNan(a) ? true
                                                              : a < b;
                                        }) -
                       buffer;
            }
            else
            {
                return std::min_element(buffer, buffer + size,
                                        [](T a, T b)
                                        {
                                            return IsNan(b)   ? true
                                                   : IsNan(a) ? false
                                                              : a < b;
                                        }) -
                       buffer;
            }
        }
        else
        {
            if constexpr (IS_MAX)
            {
                return std::max_element(buffer, buffer + size) - buffer;
            }
            else
            {
                return std::min_element(buffer, buffer + size) - buffer;
            }
        }
    }
}

#ifdef GDAL_MINMAX_ELEMENT_USE_SSE2

/************************************************************************/
/*                       extremum_element_sse2()                        */
/************************************************************************/

static inline int8_t Shift8(uint8_t x)
{
    return static_cast<int8_t>(x + std::numeric_limits<int8_t>::min());
}

static inline int16_t Shift16(uint16_t x)
{
    return static_cast<int16_t>(x + std::numeric_limits<int16_t>::min());
}

CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static inline int32_t Shift32(uint32_t x)
{
    x += static_cast<uint32_t>(std::numeric_limits<int32_t>::min());
    int32_t ret;
    memcpy(&ret, &x, sizeof(x));
    return ret;
}

CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static inline int64_t Shift64(uint64_t x)
{
    x += static_cast<uint64_t>(std::numeric_limits<int64_t>::min());
    int64_t ret;
    memcpy(&ret, &x, sizeof(x));
    return ret;
}

// Return a _mm128[i|d] register with all its elements set to x
template <class T> static inline auto set1(T x)
{
    if constexpr (std::is_same_v<T, uint8_t>)
        return _mm_set1_epi8(Shift8(x));
    else if constexpr (std::is_same_v<T, int8_t>)
        return _mm_set1_epi8(x);
    else if constexpr (std::is_same_v<T, uint16_t>)
        return _mm_set1_epi16(Shift16(x));
    else if constexpr (std::is_same_v<T, int16_t>)
        return _mm_set1_epi16(x);
    else if constexpr (std::is_same_v<T, uint32_t>)
        return _mm_set1_epi32(Shift32(x));
    else if constexpr (std::is_same_v<T, int32_t>)
        return _mm_set1_epi32(x);
    else if constexpr (std::is_same_v<T, uint64_t>)
        return _mm_set1_epi64x(Shift64(x));
    else if constexpr (std::is_same_v<T, int64_t>)
        return _mm_set1_epi64x(x);
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    else if constexpr (std::is_same_v<T, GFloat16>)
    {
        int16_t iX;
        memcpy(&iX, &x, sizeof(x));
        return _mm_set1_epi16(iX);
    }
#endif
    else if constexpr (std::is_same_v<T, float>)
        return _mm_set1_ps(x);
    else
    {
        static_assert(std::is_same_v<T, double>);
        return _mm_set1_pd(x);
    }
}

// Return a _mm128[i|d] register with all its elements set to x
template <class T> static inline auto set1_unshifted(T x)
{
    if constexpr (std::is_same_v<T, uint8_t>)
    {
        int8_t xSigned;
        memcpy(&xSigned, &x, sizeof(xSigned));
        return _mm_set1_epi8(xSigned);
    }
    else if constexpr (std::is_same_v<T, int8_t>)
        return _mm_set1_epi8(x);
    else if constexpr (std::is_same_v<T, uint16_t>)
    {
        int16_t xSigned;
        memcpy(&xSigned, &x, sizeof(xSigned));
        return _mm_set1_epi16(xSigned);
    }
    else if constexpr (std::is_same_v<T, int16_t>)
        return _mm_set1_epi16(x);
    else if constexpr (std::is_same_v<T, uint32_t>)
    {
        int32_t xSigned;
        memcpy(&xSigned, &x, sizeof(xSigned));
        return _mm_set1_epi32(xSigned);
    }
    else if constexpr (std::is_same_v<T, int32_t>)
        return _mm_set1_epi32(x);
    else if constexpr (std::is_same_v<T, uint64_t>)
    {
        int64_t xSigned;
        memcpy(&xSigned, &x, sizeof(xSigned));
        return _mm_set1_epi64x(xSigned);
    }
    else if constexpr (std::is_same_v<T, int64_t>)
        return _mm_set1_epi64x(x);
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    else if constexpr (std::is_same_v<T, GFloat16>)
    {
        int16_t iX;
        memcpy(&iX, &x, sizeof(x));
        return _mm_set1_epi16(iX);
    }
#endif
    else if constexpr (std::is_same_v<T, float>)
        return _mm_set1_ps(x);
    else
    {
        static_assert(std::is_same_v<T, double>);
        return _mm_set1_pd(x);
    }
}

// Load as many values of type T at a _mm128[i|d] register can contain from x
template <class T> static inline auto loadv(const T *x)
{
    if constexpr (std::is_same_v<T, float>)
        return _mm_loadu_ps(x);
    else if constexpr (std::is_same_v<T, double>)
        return _mm_loadu_pd(x);
    else
        return _mm_loadu_si128(reinterpret_cast<const __m128i *>(x));
}

inline __m128i IsNanGFloat16(__m128i x)
{
    // (iX & 0x7fff) > 0x7c00
    return _mm_cmpgt_epi16(_mm_and_si128(x, _mm_set1_epi16(0x7fff)),
                           _mm_set1_epi16(0x7c00));
}

template <class T, class SSE_T>
static inline SSE_T blendv(SSE_T a, SSE_T b, SSE_T mask)
{
    if constexpr (std::is_same_v<T, float>)
    {
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
        return _mm_blendv_ps(a, b, mask);
#else
        return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b));
#endif
    }
    else if constexpr (std::is_same_v<T, double>)
    {
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
        return _mm_blendv_pd(a, b, mask);
#else
        return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, b));
#endif
    }
    else
    {
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
        return _mm_blendv_epi8(a, b, mask);
#else
        return _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b));
#endif
    }
}

inline __m128i cmpgt_ph(__m128i x, __m128i y)
{
#ifdef slow
    GFloat16 vx[8], vy[8];
    int16_t res[8];
    _mm_storeu_si128(reinterpret_cast<__m128i *>(vx), x);
    _mm_storeu_si128(reinterpret_cast<__m128i *>(vy), y);
    for (int i = 0; i < 8; ++i)
    {
        res[i] = vx[i] > vy[i] ? -1 : 0;
    }
    return _mm_loadu_si128(reinterpret_cast<const __m128i *>(res));
#else
    const auto x_is_negative = _mm_srai_epi16(x, 15);
    const auto y_is_negative = _mm_srai_epi16(y, 15);
    return _mm_andnot_si128(
        // only x can be NaN given how we use this method
        IsNanGFloat16(x),
        blendv<int16_t>(_mm_or_si128(y_is_negative, _mm_cmpgt_epi16(x, y)),
                        _mm_and_si128(y_is_negative, _mm_cmpgt_epi16(y, x)),
                        x_is_negative));
#endif
}

inline __m128i cmpgt_epi64(__m128i x, __m128i y)
{
#if defined(__SSE4_2__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    return _mm_cmpgt_epi64(x, y);
#else
    auto tmp = _mm_and_si128(_mm_sub_epi64(y, x), _mm_cmpeq_epi32(x, y));
    tmp = _mm_or_si128(tmp, _mm_cmpgt_epi32(x, y));
    // Replicate the 2 odd-indexed (hi) 32-bit word into the 2 even-indexed (lo)
    // ones
    return _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 3, 1, 1));
#endif
}

// Return a __m128i register with bits set when x[i] < y[i] when !IS_MAX
// or x[i] > y[i] when IS_MAX
template <class T, bool IS_MAX, class SSE_T>
static inline __m128i comp(SSE_T x, SSE_T y)
{
    if constexpr (IS_MAX)
    {
        if constexpr (std::is_same_v<T, uint8_t>)
            return _mm_cmpgt_epi8(
                _mm_add_epi8(x,
                             _mm_set1_epi8(std::numeric_limits<int8_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int8_t>)
            return _mm_cmpgt_epi8(x, y);
        else if constexpr (std::is_same_v<T, uint16_t>)
            return _mm_cmpgt_epi16(
                _mm_add_epi16(
                    x, _mm_set1_epi16(std::numeric_limits<int16_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int16_t>)
            return _mm_cmpgt_epi16(x, y);
        else if constexpr (std::is_same_v<T, uint32_t>)
            return _mm_cmpgt_epi32(
                _mm_add_epi32(
                    x, _mm_set1_epi32(std::numeric_limits<int32_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int32_t>)
            return _mm_cmpgt_epi32(x, y);
        else if constexpr (std::is_same_v<T, uint64_t>)
        {
            return cmpgt_epi64(
                _mm_add_epi64(
                    x, _mm_set1_epi64x(std::numeric_limits<int64_t>::min())),
                y);
        }
        else if constexpr (std::is_same_v<T, int64_t>)
        {
            return cmpgt_epi64(x, y);
        }
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        else if constexpr (std::is_same_v<T, GFloat16>)
        {
            return cmpgt_ph(x, y);
        }
#endif
        else if constexpr (std::is_same_v<T, float>)
            return _mm_castps_si128(_mm_cmpgt_ps(x, y));
        else
        {
            static_assert(std::is_same_v<T, double>);
            return _mm_castpd_si128(_mm_cmpgt_pd(x, y));
        }
    }
    else
    {
        if constexpr (std::is_same_v<T, uint8_t>)
            return _mm_cmplt_epi8(
                _mm_add_epi8(x,
                             _mm_set1_epi8(std::numeric_limits<int8_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int8_t>)
            return _mm_cmplt_epi8(x, y);
        else if constexpr (std::is_same_v<T, uint16_t>)
            return _mm_cmplt_epi16(
                _mm_add_epi16(
                    x, _mm_set1_epi16(std::numeric_limits<int16_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int16_t>)
            return _mm_cmplt_epi16(x, y);
        else if constexpr (std::is_same_v<T, uint32_t>)
            return _mm_cmplt_epi32(
                _mm_add_epi32(
                    x, _mm_set1_epi32(std::numeric_limits<int32_t>::min())),
                y);
        else if constexpr (std::is_same_v<T, int32_t>)
            return _mm_cmplt_epi32(x, y);
        else if constexpr (std::is_same_v<T, uint64_t>)
        {
            return cmpgt_epi64(
                y, _mm_add_epi64(x, _mm_set1_epi64x(
                                        std::numeric_limits<int64_t>::min())));
        }
        else if constexpr (std::is_same_v<T, int64_t>)
        {
            return cmpgt_epi64(y, x);
        }
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        else if constexpr (std::is_same_v<T, GFloat16>)
        {
            return cmpgt_ph(y, x);
        }
#endif
        else if constexpr (std::is_same_v<T, float>)
            return _mm_castps_si128(_mm_cmplt_ps(x, y));
        else
        {
            static_assert(std::is_same_v<T, double>);
            return _mm_castpd_si128(_mm_cmplt_pd(x, y));
        }
    }
}

template <class T, class SSE_T> static inline SSE_T compeq(SSE_T a, SSE_T b);

template <> __m128i compeq<uint8_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi8(a, b);
}

template <> __m128i compeq<int8_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi8(a, b);
}

template <> __m128i compeq<uint16_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi16(a, b);
}

template <> __m128i compeq<int16_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi16(a, b);
}

template <> __m128i compeq<uint32_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi32(a, b);
}

template <> __m128i compeq<int32_t>(__m128i a, __m128i b)
{
    return _mm_cmpeq_epi32(a, b);
}

template <> __m128i compeq<uint64_t>(__m128i a, __m128i b)
{
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    return _mm_cmpeq_epi64(a, b);
#else
    auto tmp = _mm_cmpeq_epi32(a, b);
    // The shuffle swaps hi-lo 32-bit words
    return _mm_and_si128(tmp, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 3, 0, 1)));
#endif
}

template <>
#if defined(__INTEL_CLANG_COMPILER) &&                                         \
    !(defined(__SSE4_1__) || defined(__AVX__))
// ICC 2024 has a bug with the following code when -fiopenmp is enabled.
// Disabling inlining works around it...
__attribute__((noinline))
#endif
__m128i compeq<int64_t>(__m128i a, __m128i b)
{
    return compeq<uint64_t>(a, b);
}

template <> __m128 compeq<float>(__m128 a, __m128 b)
{
    return _mm_cmpeq_ps(a, b);
}

template <> __m128d compeq<double>(__m128d a, __m128d b)
{
    return _mm_cmpeq_pd(a, b);
}

#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
template <> __m128i compeq<GFloat16>(__m128i a, __m128i b)
{
    // !isnan(a) && !isnan(b) && (a == b || (a|b) == 0x8000)
    return _mm_andnot_si128(
        IsNanGFloat16(a),  // b cannot be NaN given how we use this method
        _mm_or_si128(_mm_cmpeq_epi16(a, b),
                     _mm_cmpeq_epi16(
                         _mm_or_si128(a, b),
                         _mm_set1_epi16(std::numeric_limits<int16_t>::min()))));
}
#endif

// Using SSE2
template <class T, bool IS_MAX, bool HAS_NODATA>
#if defined(__GNUC__)
__attribute__((noinline))
#endif
size_t extremum_element_sse2(const T *v, size_t size, T noDataValue)
{
    static_assert(std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t> ||
                  std::is_same_v<T, uint16_t> || std::is_same_v<T, int16_t> ||
                  std::is_same_v<T, uint32_t> || std::is_same_v<T, int32_t> ||
                  std::is_same_v<T, uint64_t> || std::is_same_v<T, int64_t> ||
                  is_floating_point_v<T>);
    if (size == 0)
        return 0;
    size_t idx_of_extremum = 0;
    T extremum = v[0];
    [[maybe_unused]] bool extremum_is_invalid = false;
    if constexpr (is_floating_point_v<T>)
    {
        if constexpr (HAS_NODATA)
        {
            if (IsNan(noDataValue))
            {
                return extremum_element_sse2<T, IS_MAX, false>(
                    v, size, static_cast<T>(0));
            }
        }
        extremum_is_invalid = IsNan(extremum);
    }
    if constexpr (HAS_NODATA)
    {
        if (compEqual(extremum, noDataValue))
            extremum_is_invalid = true;
    }
    size_t i = 1;

    constexpr size_t VALS_PER_REG = sizeof(set1(extremum)) / sizeof(extremum);
    constexpr int LOOP_UNROLLING = 4;
    // If changing the value, then we need to adjust the number of sse_valX
    // loading in the loop.
    static_assert(LOOP_UNROLLING == 4);
    constexpr size_t VALS_PER_ITER = VALS_PER_REG * LOOP_UNROLLING;

    const auto update = [v, noDataValue, &extremum, &idx_of_extremum,
                         &extremum_is_invalid](size_t idx)
    {
        if constexpr (HAS_NODATA)
        {
            if (compEqual(v[idx], noDataValue))
                return;
            if (extremum_is_invalid)
            {
                if constexpr (is_floating_point_v<T>)
                {
                    if (IsNan(v[idx]))
                        return;
                }
                extremum = v[idx];
                idx_of_extremum = idx;
                extremum_is_invalid = false;
                return;
            }
        }
        else
        {
            CPL_IGNORE_RET_VAL(noDataValue);
        }
        if (compScalar<T, IS_MAX>(v[idx], extremum))
        {
            extremum = v[idx];
            idx_of_extremum = idx;
            extremum_is_invalid = false;
        }
        else if constexpr (is_floating_point_v<T>)
        {
            if (extremum_is_invalid && !IsNan(v[idx]))
            {
                extremum = v[idx];
                idx_of_extremum = idx;
                extremum_is_invalid = false;
            }
        }
    };

    for (; i < VALS_PER_ITER && i < size; ++i)
    {
        update(i);
    }

    [[maybe_unused]] auto sse_neutral = set1_unshifted(static_cast<T>(0));
    [[maybe_unused]] auto sse_nodata = set1_unshifted(noDataValue);
    if constexpr (HAS_NODATA || is_floating_point_v<T>)
    {
        for (; i < size && extremum_is_invalid; ++i)
        {
            update(i);
        }
        if (!extremum_is_invalid)
        {
            for (; i < size && (i % VALS_PER_ITER) != 0; ++i)
            {
                update(i);
            }
            sse_neutral = set1_unshifted(extremum);
        }
    }

    auto sse_extremum = set1(extremum);

    [[maybe_unused]] size_t hits = 0;
    const auto sse_iter_count = (size / VALS_PER_ITER) * VALS_PER_ITER;
    for (; i < sse_iter_count; i += VALS_PER_ITER)
    {
        // A bit of loop unrolling to save 3/4 of slow movemask operations.
        auto sse_val0 = loadv(v + i + 0 * VALS_PER_REG);
        auto sse_val1 = loadv(v + i + 1 * VALS_PER_REG);
        auto sse_val2 = loadv(v + i + 2 * VALS_PER_REG);
        auto sse_val3 = loadv(v + i + 3 * VALS_PER_REG);

        if constexpr (HAS_NODATA)
        {
            // Replace all components that are at the nodata value by a
            // neutral value (current minimum)
            const auto replaceNoDataByNeutral =
                [sse_neutral, sse_nodata](auto sse_val)
            {
                const auto eq_nodata = compeq<T>(sse_val, sse_nodata);
                return blendv<T>(sse_val, sse_neutral, eq_nodata);
            };

            sse_val0 = replaceNoDataByNeutral(sse_val0);
            sse_val1 = replaceNoDataByNeutral(sse_val1);
            sse_val2 = replaceNoDataByNeutral(sse_val2);
            sse_val3 = replaceNoDataByNeutral(sse_val3);
        }

        if (_mm_movemask_epi8(_mm_or_si128(
                _mm_or_si128(comp<T, IS_MAX>(sse_val0, sse_extremum),
                             comp<T, IS_MAX>(sse_val1, sse_extremum)),
                _mm_or_si128(comp<T, IS_MAX>(sse_val2, sse_extremum),
                             comp<T, IS_MAX>(sse_val3, sse_extremum)))) != 0)
        {
            if constexpr (!std::is_same_v<T, int8_t> &&
                          !std::is_same_v<T, uint8_t>)
            {
                // The above tests excluding int8_t/uint8_t is due to the fact
                // with those small ranges of values we will quickly converge
                // to the minimum, so no need to do the below "smart" test.

                if (++hits == size / 16)
                {
                    // If we have an almost sorted array, then using this code path
                    // will hurt performance. Arbitrary give up if we get here
                    // more than 1. / 16 of the size of the array.
                    // fprintf(stderr, "going to non-vector path\n");
                    break;
                }
            }
            for (size_t j = 0; j < VALS_PER_ITER; j++)
            {
                update(i + j);
            }

            sse_extremum = set1(extremum);
            if constexpr (HAS_NODATA)
            {
                sse_neutral = set1_unshifted(extremum);
            }
        }
    }
    for (; i < size; ++i)
    {
        update(i);
    }
    return idx_of_extremum;
}

/************************************************************************/
/*                          extremum_element()                          */
/************************************************************************/

template <class T, bool IS_MAX>
inline size_t extremum_element(const T *buffer, size_t size, T noDataValue)
{
    return extremum_element_sse2<T, IS_MAX, true>(buffer, size, noDataValue);
}

template <class T, bool IS_MAX>
inline size_t extremum_element(const T *buffer, size_t size)
{
    return extremum_element_sse2<T, IS_MAX, false>(buffer, size,
                                                   static_cast<T>(0));
}

#else

template <class T, bool IS_MAX>
inline size_t extremum_element(const T *buffer, size_t size, T noDataValue)
{
    if constexpr (is_floating_point_v<T>)
        return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size,
                                                            noDataValue);
    else
        return extremum_element_generic<T, IS_MAX>(buffer, size, true,
                                                   noDataValue);
}

template <class T, bool IS_MAX>
inline size_t extremum_element(const T *buffer, size_t size)
{
    if constexpr (is_floating_point_v<T>)
        return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size);
    else
        return extremum_element_generic<T, IS_MAX>(buffer, size, false,
                                                   static_cast<T>(0));
}

#endif

/************************************************************************/
/*                          extremum_element()                          */
/************************************************************************/

template <class T, bool IS_MAX>
inline size_t extremum_element(const T *buffer, size_t size, bool bHasNoData,
                               T noDataValue)
{
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0) &&                      \
    !defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
    if constexpr (std::is_same_v<T, GFloat16>)
    {
        if (bHasNoData)
            return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size,
                                                                noDataValue);
        else
            return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size);
    }
    else
#endif
        if (bHasNoData)
        return extremum_element<T, IS_MAX>(buffer, size, noDataValue);
    else
        return extremum_element<T, IS_MAX>(buffer, size);
}

template <class T, class NODATAType>
inline bool IsValueExactAs([[maybe_unused]] NODATAType noDataValue)
{
    if constexpr (std::is_same_v<T, NODATAType>)
        return true;
    else
        return GDALIsValueExactAs<T>(static_cast<double>(noDataValue));
}

template <bool IS_MAX, class NODATAType>
size_t extremum_element(const void *buffer, size_t nElts, GDALDataType eDT,
                        bool bHasNoData, NODATAType noDataValue)
{
    switch (eDT)
    {
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 7, 0)
        case GDT_Int8:
        {
            using T = int8_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#endif
        case GDT_Byte:
        {
            using T = uint8_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Int16:
        {
            using T = int16_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt16:
        {
            using T = uint16_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Int32:
        {
            using T = int32_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt32:
        {
            using T = uint32_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 5, 0)
        case GDT_Int64:
        {
            using T = int64_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt64:
        {
            using T = uint64_t;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#endif
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        case GDT_Float16:
        {
            using T = GFloat16;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : static_cast<T>(0));
        }
#endif
        case GDT_Float32:
        {
            using T = float;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Float64:
        {
            using T = double;
            bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
            return extremum_element<T, IS_MAX>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_CInt16:
        case GDT_CInt32:
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        case GDT_CFloat16:
#endif
        case GDT_CFloat32:
        case GDT_CFloat64:
        case GDT_Unknown:
        case GDT_TypeCount:
            break;
    }
    CPLError(CE_Failure, CPLE_NotSupported,
             "%s not supported for this data type.", __FUNCTION__);
    return 0;
}

}  // namespace detail

/************************************************************************/
/*                            max_element()                             */
/************************************************************************/

/** Return the index of the element where the maximum value is hit.
 *
 * If it is hit in several locations, it is not specified which one will be
 * returned.
 *
 * @param buffer Vector of nElts elements of type eDT.
 * @param nElts Number of elements in buffer.
 * @param eDT Data type of the elements of buffer.
 * @param bHasNoData Whether noDataValue is valid.
 * @param noDataValue Nodata value, only taken into account if bHasNoData == true
 *
 * @since GDAL 3.11
 */
template <class NODATAType>
inline size_t max_element(const void *buffer, size_t nElts, GDALDataType eDT,
                          bool bHasNoData, NODATAType noDataValue)
{
    return detail::extremum_element<true>(buffer, nElts, eDT, bHasNoData,
                                          noDataValue);
}

/************************************************************************/
/*                            min_element()                             */
/************************************************************************/

/** Return the index of the element where the minimum value is hit.
 *
 * If it is hit in several locations, it is not specified which one will be
 * returned.
 *
 * @param buffer Vector of nElts elements of type eDT.
 * @param nElts Number of elements in buffer.
 * @param eDT Data type of the elements of buffer.
 * @param bHasNoData Whether noDataValue is valid.
 * @param noDataValue Nodata value, only taken into account if bHasNoData == true
 *
 * @since GDAL 3.11
 */
template <class NODATAType>
inline size_t min_element(const void *buffer, size_t nElts, GDALDataType eDT,
                          bool bHasNoData, NODATAType noDataValue)
{
    return detail::extremum_element<false>(buffer, nElts, eDT, bHasNoData,
                                           noDataValue);
}

namespace detail
{

#ifdef NOT_EFFICIENT

/************************************************************************/
/*                           minmax_element()                           */
/************************************************************************/

template <class T>
std::pair<size_t, size_t> minmax_element(const T *v, size_t size, T noDataValue)
{
    static_assert(!(std::is_floating_point_v<T>));
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    static_assert(!std::is_same_v<T, GFloat16>);
#endif
    if (size == 0)
        return std::pair(0, 0);
    size_t idx_of_min = 0;
    size_t idx_of_max = 0;
    T vmin = v[0];
    T vmax = v[0];
    bool extremum_is_nodata = vmin == noDataValue;
    size_t i = 1;
    for (; i < size; ++i)
    {
        if (v[i] != noDataValue && (v[i] < vmin || extremum_is_nodata))
        {
            vmin = v[i];
            idx_of_min = i;
            extremum_is_nodata = false;
        }
        if (v[i] != noDataValue && (v[i] > vmax || extremum_is_nodata))
        {
            vmax = v[i];
            idx_of_max = i;
            extremum_is_nodata = false;
        }
    }
    return std::pair(idx_of_min, idx_of_max);
}

template <class T>
std::pair<size_t, size_t> minmax_element(const T *v, size_t size)
{
    static_assert(!(std::is_floating_point_v<T>));
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    static_assert(!std::is_same_v<T, GFloat16>);
#endif
    if (size == 0)
        return std::pair(0, 0);
    size_t idx_of_min = 0;
    size_t idx_of_max = 0;
    T vmin = v[0];
    T vmax = v[0];
    size_t i = 1;
    for (; i < size; ++i)
    {
        if (v[i] < vmin)
        {
            vmin = v[i];
            idx_of_min = i;
        }
        if (v[i] > vmax)
        {
            vmax = v[i];
            idx_of_max = i;
        }
    }
    return std::pair(idx_of_min, idx_of_max);
}

template <class T>
inline std::pair<size_t, size_t> minmax_element_with_nan(const T *v,
                                                         size_t size)
{
    if (size == 0)
        return std::pair(0, 0);
    size_t idx_of_min = 0;
    size_t idx_of_max = 0;
    T vmin = v[0];
    T vmax = v[0];
    size_t i = 1;
    if (IsNan(v[0]))
    {
        for (; i < size; ++i)
        {
            if (!IsNan(v[i]))
            {
                vmin = v[i];
                idx_of_min = i;
                vmax = v[i];
                idx_of_max = i;
                break;
            }
        }
    }
    for (; i < size; ++i)
    {
        if (v[i] < vmin)
        {
            vmin = v[i];
            idx_of_min = i;
        }
        if (v[i] > vmax)
        {
            vmax = v[i];
            idx_of_max = i;
        }
    }
    return std::pair(idx_of_min, idx_of_max);
}

template <>
std::pair<size_t, size_t> minmax_element<float>(const float *v, size_t size)
{
    return minmax_element_with_nan<float>(v, size);
}

template <>
std::pair<size_t, size_t> minmax_element<double>(const double *v, size_t size)
{
    return minmax_element_with_nan<double>(v, size);
}

template <class T>
inline std::pair<size_t, size_t> minmax_element(const T *buffer, size_t size,
                                                bool bHasNoData, T noDataValue)
{
    if (bHasNoData)
    {
        return minmax_element<T>(buffer, size, noDataValue);
    }
    else
    {
        return minmax_element<T>(buffer, size);
    }
}
#else

/************************************************************************/
/*                           minmax_element()                           */
/************************************************************************/

template <class T>
inline std::pair<size_t, size_t> minmax_element(const T *buffer, size_t size,
                                                bool bHasNoData, T noDataValue)
{
#ifdef NOT_EFFICIENT
    if (bHasNoData)
    {
        return minmax_element<T>(buffer, size, noDataValue);
    }
    else
    {
        return minmax_element<T>(buffer, size);
        //auto [imin, imax] = std::minmax_element(buffer, buffer + size);
        //return std::pair(imin - buffer, imax - buffer);
    }
#else

#if !defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
    if constexpr (!std::is_floating_point_v<T>
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
                  && !std::is_same_v<T, GFloat16>
#endif
    )
    {
        if (!bHasNoData)
        {
            auto [min_iter, max_iter] =
                std::minmax_element(buffer, buffer + size);
            return std::pair(min_iter - buffer, max_iter - buffer);
        }
    }
#endif

    // Using separately min and max is more efficient than computing them
    // within the same loop
    return std::pair(
        extremum_element<T, false>(buffer, size, bHasNoData, noDataValue),
        extremum_element<T, true>(buffer, size, bHasNoData, noDataValue));

#endif
}
#endif

}  // namespace detail

/************************************************************************/
/*                           minmax_element()                           */
/************************************************************************/

/** Return the index of the elements where the minimum and maximum values are hit.
 *
 * If they are hit in several locations, it is not specified which one will be
 * returned (contrary to std::minmax_element).
 *
 * @param buffer Vector of nElts elements of type eDT.
 * @param nElts Number of elements in buffer.
 * @param eDT Data type of the elements of buffer.
 * @param bHasNoData Whether noDataValue is valid.
 * @param noDataValue Nodata value, only taken into account if bHasNoData == true
 *
 * @since GDAL 3.11
 */
template <class NODATAType>
inline std::pair<size_t, size_t>
minmax_element(const void *buffer, size_t nElts, GDALDataType eDT,
               bool bHasNoData, NODATAType noDataValue)
{
    switch (eDT)
    {
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 7, 0)
        case GDT_Int8:
        {
            using T = int8_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#endif
        case GDT_Byte:
        {
            using T = uint8_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Int16:
        {
            using T = int16_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt16:
        {
            using T = uint16_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Int32:
        {
            using T = int32_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt32:
        {
            using T = uint32_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 5, 0)
        case GDT_Int64:
        {
            using T = int64_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_UInt64:
        {
            using T = uint64_t;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
#endif
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        case GDT_Float16:
        {
            using T = GFloat16;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : static_cast<T>(0));
        }
#endif
        case GDT_Float32:
        {
            using T = float;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_Float64:
        {
            using T = double;
            bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
            return detail::minmax_element<T>(
                static_cast<const T *>(buffer), nElts, bHasNoData,
                bHasNoData ? static_cast<T>(noDataValue) : 0);
        }
        case GDT_CInt16:
        case GDT_CInt32:
#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
        case GDT_CFloat16:
#endif
        case GDT_CFloat32:
        case GDT_CFloat64:
        case GDT_Unknown:
        case GDT_TypeCount:
            break;
    }
    CPLError(CE_Failure, CPLE_NotSupported,
             "%s not supported for this data type.", __FUNCTION__);
    return std::pair(0, 0);
}

}  // namespace GDAL_MINMAXELT_NS

#endif  // GDAL_MINMAX_ELEMENT_INCLUDED
flet / flet-libgdal python

Products

About

Resources

Contact Gemfury