Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / util / range.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstddef>
#include <cstdint>
#include <iterator>
#include <numeric>
#include <tuple>
#include <utility>
#include <vector>

namespace arrow::internal {

/// Create a vector containing the values from start up to stop
template <typename T>
std::vector<T> Iota(T start, T stop) {
  if (start > stop) {
    return {};
  }
  std::vector<T> result(static_cast<size_t>(stop - start));
  std::iota(result.begin(), result.end(), start);
  return result;
}

/// Create a vector containing the values from 0 up to length
template <typename T>
std::vector<T> Iota(T length) {
  return Iota(static_cast<T>(0), length);
}

/// Create a range from a callable which takes a single index parameter
/// and returns the value of iterator on each call and a length.
/// Only iterators obtained from the same range should be compared, the
/// behaviour generally similar to other STL containers.
template <typename Generator>
class LazyRange {
 private:
  // callable which generates the values
  // has to be defined at the beginning of the class for type deduction
  const Generator gen_;
  // the length of the range
  int64_t length_;
#ifdef _MSC_VER
  // workaround to VS2010 not supporting decltype properly
  // see https://stackoverflow.com/questions/21782846/decltype-for-class-member-function
  static Generator gen_static_;
#endif

 public:
#ifdef _MSC_VER
  using return_type = decltype(gen_static_(0));
#else
  using return_type = decltype(gen_(0));
#endif

  /// Construct a new range from a callable and length
  LazyRange(Generator gen, int64_t length) : gen_(gen), length_(length) {}

  // Class of the dependent iterator, created implicitly by begin and end
  class RangeIter {
   public:
    using difference_type = int64_t;
    using value_type = return_type;
    using reference = const value_type&;
    using pointer = const value_type*;
    using iterator_category = std::forward_iterator_tag;

#ifdef _MSC_VER
    // msvc complains about unchecked iterators,
    // see https://stackoverflow.com/questions/21655496/error-c4996-checked-iterators
    using _Unchecked_type = typename LazyRange<Generator>::RangeIter;
#endif

    RangeIter() = delete;
    RangeIter(const RangeIter& other) = default;
    RangeIter& operator=(const RangeIter& other) = default;

    RangeIter(const LazyRange<Generator>& range, int64_t index)
        : range_(&range), index_(index) {}

    const return_type operator*() const { return range_->gen_(index_); }

    RangeIter operator+(difference_type length) const {
      return RangeIter(*range_, index_ + length);
    }

    // pre-increment
    RangeIter& operator++() {
      ++index_;
      return *this;
    }

    // post-increment
    RangeIter operator++(int) {
      auto copy = RangeIter(*this);
      ++index_;
      return copy;
    }

    bool operator==(const typename LazyRange<Generator>::RangeIter& other) const {
      return this->index_ == other.index_ && this->range_ == other.range_;
    }

    bool operator!=(const typename LazyRange<Generator>::RangeIter& other) const {
      return this->index_ != other.index_ || this->range_ != other.range_;
    }

    int64_t operator-(const typename LazyRange<Generator>::RangeIter& other) const {
      return this->index_ - other.index_;
    }

    bool operator<(const typename LazyRange<Generator>::RangeIter& other) const {
      return this->index_ < other.index_;
    }

   private:
    // parent range reference
    const LazyRange* range_;
    // current index
    int64_t index_;
  };

  friend class RangeIter;

  // Create a new begin const iterator
  RangeIter begin() { return RangeIter(*this, 0); }

  // Create a new end const iterator
  RangeIter end() { return RangeIter(*this, length_); }
};

/// Helper function to create a lazy range from a callable (e.g. lambda) and length
template <typename Generator>
LazyRange<Generator> MakeLazyRange(Generator&& gen, int64_t length) {
  return LazyRange<Generator>(std::forward<Generator>(gen), length);
}

/// \brief A helper for iterating multiple ranges simultaneously, similar to C++23's
/// zip() view adapter modelled after python's built-in zip() function.
///
/// \code {.cpp}
/// const std::vector<SomeTable>& tables = ...
/// std::function<std::vector<std::string>()> GetNames = ...
/// for (auto [table, name] : Zip(tables, GetNames())) {
///   static_assert(std::is_same_v<decltype(table), const SomeTable&>);
///   static_assert(std::is_same_v<decltype(name), std::string&>);
///   // temporaries (like this vector of strings) are kept alive for the
///   // duration of a loop and are safely movable).
///   RegisterTableWithName(std::move(name), &table);
/// }
/// \endcode
///
/// The zipped sequence ends as soon as any of its member ranges ends.
///
/// Always use `auto` for the loop's declaration; it will always be a tuple
/// of references so for example using `const auto&` will compile but will
/// *look* like forcing const-ness even though the members of the tuple are
/// still mutable references.
///
/// NOTE: we *could* make Zip a more full fledged range and enable things like
/// - gtest recognizing it as a container; it currently doesn't since Zip is
///   always mutable so this breaks:
///       EXPECT_THAT(Zip(std::vector{0}, std::vector{1}),
///                   ElementsAre(std::tuple{0, 1}));
/// - letting it be random access when possible so we can do things like *sort*
///   parallel ranges
/// - ...
///
/// However doing this will increase the compile time overhead of using Zip as
/// long as we're still using headers. Therefore until we can use c++20 modules:
/// *don't* extend Zip.
template <typename Ranges, typename Indices>
struct Zip;

template <typename... Ranges>
Zip(Ranges&&...) -> Zip<std::tuple<Ranges...>, std::index_sequence_for<Ranges...>>;

template <typename... Ranges, size_t... I>
struct Zip<std::tuple<Ranges...>, std::index_sequence<I...>> {
  explicit Zip(Ranges... ranges) : ranges_(std::forward<Ranges>(ranges)...) {}

  std::tuple<Ranges...> ranges_;

  using sentinel = std::tuple<decltype(std::end(std::get<I>(ranges_)))...>;
  constexpr sentinel end() { return {std::end(std::get<I>(ranges_))...}; }

  struct iterator : std::tuple<decltype(std::begin(std::get<I>(ranges_)))...> {
    using std::tuple<decltype(std::begin(std::get<I>(ranges_)))...>::tuple;

    constexpr auto operator*() {
      return std::tuple<decltype(*std::get<I>(*this))...>{*std::get<I>(*this)...};
    }

    constexpr iterator& operator++() {
      (++std::get<I>(*this), ...);
      return *this;
    }

    constexpr bool operator!=(const sentinel& s) const {
      bool all_iterators_valid = (... && (std::get<I>(*this) != std::get<I>(s)));
      return all_iterators_valid;
    }
  };
  constexpr iterator begin() { return {std::begin(std::get<I>(ranges_))...}; }
};

/// \brief A lazy sequence of integers which starts from 0 and never stops.
///
/// This can be used in conjunction with Zip() to emulate python's built-in
/// enumerate() function:
///
/// \code {.cpp}
/// const std::vector<SomeTable>& tables = ...
/// for (auto [i, table] : Zip(Enumerate<>, tables)) {
///   std::cout << "#" << i << ": " << table.name() << std::endl;
/// }
/// \endcode
template <typename I = size_t>
constexpr auto Enumerate = [] {
  struct {
    struct sentinel {};
    constexpr sentinel end() const { return {}; }

    struct iterator {
      I value{0};

      constexpr I operator*() { return value; }

      constexpr iterator& operator++() {
        ++value;
        return *this;
      }

      constexpr std::true_type operator!=(sentinel) const { return {}; }
    };
    constexpr iterator begin() const { return {}; }
  } out;

  return out;
}();

}  // namespace arrow::internal