Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / util / iterator.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cassert>
#include <functional>
#include <memory>
#include <optional>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/compare.h"
#include "arrow/util/functional.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

template <typename T>
class Iterator;

template <typename T>
struct IterationTraits {
  /// \brief a reserved value which indicates the end of iteration. By
  /// default this is NULLPTR since most iterators yield pointer types.
  /// Specialize IterationTraits if different end semantics are required.
  ///
  /// Note: This should not be used to determine if a given value is a
  /// terminal value.  Use IsIterationEnd (which uses IsEnd) instead.  This
  /// is only for returning terminal values.
  static T End() { return T(NULLPTR); }

  /// \brief Checks to see if the value is a terminal value.
  /// A method is used here since T is not necessarily comparable in many
  /// cases even though it has a distinct final value
  static bool IsEnd(const T& val) { return val == End(); }
};

template <typename T>
T IterationEnd() {
  return IterationTraits<T>::End();
}

template <typename T>
bool IsIterationEnd(const T& val) {
  return IterationTraits<T>::IsEnd(val);
}

template <typename T>
struct IterationTraits<std::optional<T>> {
  /// \brief by default when iterating through a sequence of optional,
  /// nullopt indicates the end of iteration.
  /// Specialize IterationTraits if different end semantics are required.
  static std::optional<T> End() { return std::nullopt; }

  /// \brief by default when iterating through a sequence of optional,
  /// nullopt (!has_value()) indicates the end of iteration.
  /// Specialize IterationTraits if different end semantics are required.
  static bool IsEnd(const std::optional<T>& val) { return !val.has_value(); }

  // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
  // Result<optional<T>> which is unnecessary (since only the unyielded end optional
  // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
};

/// \brief A generic Iterator that can return errors
template <typename T>
class Iterator : public util::EqualityComparable<Iterator<T>> {
 public:
  /// \brief Iterator may be constructed from any type which has a member function
  /// with signature Result<T> Next();
  /// End of iterator is signalled by returning IteratorTraits<T>::End();
  ///
  /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
  /// its destructor and its Next method (which are stored in function pointers) are
  /// referenced after construction.
  ///
  /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using
  /// an abstract template base class: instead of being inlined as usual for a template
  /// function the base's virtual destructor will be exported, leading to multiple
  /// definition errors when linking to any other TU where the base is instantiated.
  template <typename Wrapped>
  explicit Iterator(Wrapped has_next)
      : ptr_(new Wrapped(std::move(has_next)), Delete<Wrapped>), next_(Next<Wrapped>) {}

  Iterator() : ptr_(NULLPTR, [](void*) {}) {}

  /// \brief Return the next element of the sequence, IterationTraits<T>::End() when the
  /// iteration is completed.
  Result<T> Next() {
    if (ptr_) {
      auto next_result = next_(ptr_.get());
      if (next_result.ok() && IsIterationEnd(next_result.ValueUnsafe())) {
        ptr_.reset(NULLPTR);
      }
      return next_result;
    } else {
      return IterationTraits<T>::End();
    }
  }

  /// Pass each element of the sequence to a visitor. Will return any error status
  /// returned by the visitor, terminating iteration.
  template <typename Visitor>
  Status Visit(Visitor&& visitor) {
    for (;;) {
      ARROW_ASSIGN_OR_RAISE(auto value, Next());

      if (IsIterationEnd(value)) break;

      ARROW_RETURN_NOT_OK(visitor(std::move(value)));
    }

    return Status::OK();
  }

  /// Iterators will only compare equal if they are both null.
  /// Equality comparability is required to make an Iterator of Iterators
  /// (to check for the end condition).
  bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; }

  explicit operator bool() const { return ptr_ != NULLPTR; }

  class RangeIterator {
   public:
    RangeIterator() : value_(IterationTraits<T>::End()) {}

    explicit RangeIterator(Iterator i)
        : value_(IterationTraits<T>::End()),
          iterator_(std::make_shared<Iterator>(std::move(i))) {
      Next();
    }

    bool operator!=(const RangeIterator& other) const { return value_ != other.value_; }

    RangeIterator& operator++() {
      Next();
      return *this;
    }

    Result<T> operator*() {
      ARROW_RETURN_NOT_OK(value_.status());

      auto value = std::move(value_);
      value_ = IterationTraits<T>::End();
      return value;
    }

   private:
    void Next() {
      if (!value_.ok()) {
        value_ = IterationTraits<T>::End();
        return;
      }
      value_ = iterator_->Next();
    }

    Result<T> value_;
    std::shared_ptr<Iterator> iterator_;
  };

  RangeIterator begin() { return RangeIterator(std::move(*this)); }

  RangeIterator end() { return RangeIterator(); }

  /// \brief Move every element of this iterator into a vector.
  Result<std::vector<T>> ToVector() {
    std::vector<T> out;
    for (auto maybe_element : *this) {
      ARROW_ASSIGN_OR_RAISE(auto element, maybe_element);
      out.push_back(std::move(element));
    }
    return out;
  }

 private:
  /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and
  /// deletes that.
  template <typename HasNext>
  static void Delete(void* ptr) {
    delete static_cast<HasNext*>(ptr);
  }

  /// Implementation of Next: Casts from void* to the wrapped type and invokes that
  /// type's Next member function.
  template <typename HasNext>
  static Result<T> Next(void* ptr) {
    return static_cast<HasNext*>(ptr)->Next();
  }

  /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first
  /// casts from void* to a pointer to the wrapped type then deletes that.
  std::unique_ptr<void, void (*)(void*)> ptr_;

  /// next_ is a function pointer which first casts from void* to a pointer to the wrapped
  /// type then invokes its Next member function.
  Result<T> (*next_)(void*) = NULLPTR;
};

template <typename T>
struct TransformFlow {
  using YieldValueType = T;

  TransformFlow(YieldValueType value, bool ready_for_next)
      : finished_(false),
        ready_for_next_(ready_for_next),
        yield_value_(std::move(value)) {}
  TransformFlow(bool finished, bool ready_for_next)
      : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {}

  bool HasValue() const { return yield_value_.has_value(); }
  bool Finished() const { return finished_; }
  bool ReadyForNext() const { return ready_for_next_; }
  T Value() const { return *yield_value_; }

  bool finished_ = false;
  bool ready_for_next_ = false;
  std::optional<YieldValueType> yield_value_;
};

struct TransformFinish {
  template <typename T>
  operator TransformFlow<T>() && {  // NOLINT explicit
    return TransformFlow<T>(true, true);
  }
};

struct TransformSkip {
  template <typename T>
  operator TransformFlow<T>() && {  // NOLINT explicit
    return TransformFlow<T>(false, true);
  }
};

template <typename T>
TransformFlow<T> TransformYield(T value = {}, bool ready_for_next = true) {
  return TransformFlow<T>(std::move(value), ready_for_next);
}

template <typename T, typename V>
using Transformer = std::function<Result<TransformFlow<V>>(T)>;

template <typename T, typename V>
class TransformIterator {
 public:
  explicit TransformIterator(Iterator<T> it, Transformer<T, V> transformer)
      : it_(std::move(it)),
        transformer_(std::move(transformer)),
        last_value_(),
        finished_() {}

  Result<V> Next() {
    while (!finished_) {
      ARROW_ASSIGN_OR_RAISE(std::optional<V> next, Pump());
      if (next.has_value()) {
        return std::move(*next);
      }
      ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next());
    }
    return IterationTraits<V>::End();
  }

 private:
  // Calls the transform function on the current value.  Can return in several ways
  // * If the next value is requested (e.g. skip) it will return an empty optional
  // * If an invalid status is encountered that will be returned
  // * If finished it will return IterationTraits<V>::End()
  // * If a value is returned by the transformer that will be returned
  Result<std::optional<V>> Pump() {
    if (!finished_ && last_value_.has_value()) {
      auto next_res = transformer_(*last_value_);
      if (!next_res.ok()) {
        finished_ = true;
        return next_res.status();
      }
      auto next = std::move(*next_res);
      if (next.ReadyForNext()) {
        if (IsIterationEnd(*last_value_)) {
          finished_ = true;
        }
        last_value_.reset();
      }
      if (next.Finished()) {
        finished_ = true;
      }
      if (next.HasValue()) {
        return next.Value();
      }
    }
    if (finished_) {
      return IterationTraits<V>::End();
    }
    return std::nullopt;
  }

  Iterator<T> it_;
  Transformer<T, V> transformer_;
  std::optional<T> last_value_;
  bool finished_ = false;
};

/// \brief Transforms an iterator according to a transformer, returning a new Iterator.
///
/// The transformer will be called on each element of the source iterator and for each
/// call it can yield a value, skip, or finish the iteration.  When yielding a value the
/// transformer can choose to consume the source item (the default, ready_for_next = true)
/// or to keep it and it will be called again on the same value.
///
/// This is essentially a more generic form of the map operation that can return 0, 1, or
/// many values for each of the source items.
///
/// The transformer will be exposed to the end of the source sequence
/// (IterationTraits::End) in case it needs to return some penultimate item(s).
///
/// Any invalid status returned by the transformer will be returned immediately.
template <typename T, typename V>
Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) {
  return Iterator<V>(TransformIterator<T, V>(std::move(it), std::move(op)));
}

template <typename T>
struct IterationTraits<Iterator<T>> {
  // The end condition for an Iterator of Iterators is a default constructed (null)
  // Iterator.
  static Iterator<T> End() { return Iterator<T>(); }
  static bool IsEnd(const Iterator<T>& val) { return !val; }
};

template <typename Fn, typename T>
class FunctionIterator {
 public:
  explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {}

  Result<T> Next() { return fn_(); }

 private:
  Fn fn_;
};

/// \brief Construct an Iterator which invokes a callable on Next()
template <typename Fn,
          typename Ret = typename internal::call_traits::return_type<Fn>::ValueType>
Iterator<Ret> MakeFunctionIterator(Fn fn) {
  return Iterator<Ret>(FunctionIterator<Fn, Ret>(std::move(fn)));
}

template <typename T>
Iterator<T> MakeEmptyIterator() {
  return MakeFunctionIterator([]() -> Result<T> { return IterationTraits<T>::End(); });
}

template <typename T>
Iterator<T> MakeErrorIterator(Status s) {
  return MakeFunctionIterator([s]() -> Result<T> {
    ARROW_RETURN_NOT_OK(s);
    return IterationTraits<T>::End();
  });
}

/// \brief Simple iterator which yields the elements of a std::vector
template <typename T>
class VectorIterator {
 public:
  explicit VectorIterator(std::vector<T> v) : elements_(std::move(v)) {}

  Result<T> Next() {
    if (i_ == elements_.size()) {
      return IterationTraits<T>::End();
    }
    return std::move(elements_[i_++]);
  }

 private:
  std::vector<T> elements_;
  size_t i_ = 0;
};

template <typename T>
Iterator<T> MakeVectorIterator(std::vector<T> v) {
  return Iterator<T>(VectorIterator<T>(std::move(v)));
}

/// \brief Simple iterator which yields *pointers* to the elements of a std::vector<T>.
/// This is provided to support T where IterationTraits<T>::End is not specialized
template <typename T>
class VectorPointingIterator {
 public:
  explicit VectorPointingIterator(std::vector<T> v) : elements_(std::move(v)) {}

  Result<T*> Next() {
    if (i_ == elements_.size()) {
      return NULLPTR;
    }
    return &elements_[i_++];
  }

 private:
  std::vector<T> elements_;
  size_t i_ = 0;
};

template <typename T>
Iterator<T*> MakeVectorPointingIterator(std::vector<T> v) {
  return Iterator<T*>(VectorPointingIterator<T>(std::move(v)));
}

/// \brief MapIterator takes ownership of an iterator and a function to apply
/// on every element. The mapped function is not allowed to fail.
template <typename Fn, typename I, typename O>
class MapIterator {
 public:
  explicit MapIterator(Fn map, Iterator<I> it)
      : map_(std::move(map)), it_(std::move(it)) {}

  Result<O> Next() {
    ARROW_ASSIGN_OR_RAISE(I i, it_.Next());

    if (IsIterationEnd(i)) {
      return IterationTraits<O>::End();
    }

    return map_(std::move(i));
  }

 private:
  Fn map_;
  Iterator<I> it_;
};

/// \brief MapIterator takes ownership of an iterator and a function to apply
/// on every element. The mapped function is not allowed to fail.
template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
          typename To = internal::call_traits::return_type<Fn>>
Iterator<To> MakeMapIterator(Fn map, Iterator<From> it) {
  return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
}

/// \brief Like MapIterator, but where the function can fail.
template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
Iterator<To> MakeMaybeMapIterator(Fn map, Iterator<From> it) {
  return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
}

struct FilterIterator {
  enum Action { ACCEPT, REJECT };

  template <typename To>
  static Result<std::pair<To, Action>> Reject() {
    return std::make_pair(IterationTraits<To>::End(), REJECT);
  }

  template <typename To>
  static Result<std::pair<To, Action>> Accept(To out) {
    return std::make_pair(std::move(out), ACCEPT);
  }

  template <typename To>
  static Result<std::pair<To, Action>> MaybeAccept(Result<To> maybe_out) {
    return std::move(maybe_out).Map(Accept<To>);
  }

  template <typename To>
  static Result<std::pair<To, Action>> Error(Status s) {
    return s;
  }

  template <typename Fn, typename From, typename To>
  class Impl {
   public:
    explicit Impl(Fn filter, Iterator<From> it) : filter_(filter), it_(std::move(it)) {}

    Result<To> Next() {
      To out = IterationTraits<To>::End();
      Action action;

      for (;;) {
        ARROW_ASSIGN_OR_RAISE(From i, it_.Next());

        if (IsIterationEnd(i)) {
          return IterationTraits<To>::End();
        }

        ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i)));

        if (action == ACCEPT) return out;
      }
    }

   private:
    Fn filter_;
    Iterator<From> it_;
  };
};

/// \brief Like MapIterator, but where the function can fail or reject elements.
template <
    typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>,
    typename Ret = typename internal::call_traits::return_type<Fn>::ValueType,
    typename To = typename std::tuple_element<0, Ret>::type,
    typename Enable = typename std::enable_if<std::is_same<
        typename std::tuple_element<1, Ret>::type, FilterIterator::Action>::value>::type>
Iterator<To> MakeFilterIterator(Fn filter, Iterator<From> it) {
  return Iterator<To>(
      FilterIterator::Impl<Fn, From, To>(std::move(filter), std::move(it)));
}

/// \brief FlattenIterator takes an iterator generating iterators and yields a
/// unified iterator that flattens/concatenates in a single stream.
template <typename T>
class FlattenIterator {
 public:
  explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}

  Result<T> Next() {
    if (IsIterationEnd(child_)) {
      // Pop from parent's iterator.
      ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());

      // Check if final iteration reached.
      if (IsIterationEnd(child_)) {
        return IterationTraits<T>::End();
      }

      return Next();
    }

    // Pop from child_ and check for depletion.
    ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
    if (IsIterationEnd(out)) {
      // Reset state such that we pop from parent on the recursive call
      child_ = IterationTraits<Iterator<T>>::End();

      return Next();
    }

    return out;
  }

 private:
  Iterator<Iterator<T>> parent_;
  Iterator<T> child_ = IterationTraits<Iterator<T>>::End();
};

template <typename T>
Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
  return Iterator<T>(FlattenIterator<T>(std::move(it)));
}

template <typename Reader>
Iterator<typename Reader::ValueType> MakeIteratorFromReader(
    const std::shared_ptr<Reader>& reader) {
  return MakeFunctionIterator([reader] { return reader->Next(); });
}

}  // namespace arrow