Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev70 

/ include / arrow / compute / api_vector.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>
#include <utility>

#include "arrow/compute/function_options.h"
#include "arrow/compute/ordering.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"

namespace arrow {
namespace compute {

class ExecContext;

/// \addtogroup compute-concrete-options
/// @{

class ARROW_EXPORT FilterOptions : public FunctionOptions {
 public:
  /// Configure the action taken when a slot of the selection mask is null
  enum NullSelectionBehavior {
    /// The corresponding filtered value will be removed in the output.
    DROP,
    /// The corresponding filtered value will be null in the output.
    EMIT_NULL,
  };

  explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
  static constexpr char const kTypeName[] = "FilterOptions";
  static FilterOptions Defaults() { return FilterOptions(); }

  NullSelectionBehavior null_selection_behavior = DROP;
};

class ARROW_EXPORT TakeOptions : public FunctionOptions {
 public:
  explicit TakeOptions(bool boundscheck = true);
  static constexpr char const kTypeName[] = "TakeOptions";
  static TakeOptions BoundsCheck() { return TakeOptions(true); }
  static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
  static TakeOptions Defaults() { return BoundsCheck(); }

  bool boundscheck = true;
};

/// \brief Options for the dictionary encode function
class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
 public:
  /// Configure how null values will be encoded
  enum NullEncodingBehavior {
    /// The null value will be added to the dictionary with a proper index.
    ENCODE,
    /// The null value will be masked in the indices array.
    MASK
  };

  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
  static constexpr char const kTypeName[] = "DictionaryEncodeOptions";
  static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }

  NullEncodingBehavior null_encoding_behavior = MASK;
};

/// \brief Options for the run-end encode function
class ARROW_EXPORT RunEndEncodeOptions : public FunctionOptions {
 public:
  explicit RunEndEncodeOptions(std::shared_ptr<DataType> run_end_type = int32());
  static constexpr char const kTypeName[] = "RunEndEncodeOptions";
  static RunEndEncodeOptions Defaults() { return RunEndEncodeOptions(); }

  std::shared_ptr<DataType> run_end_type;
};

class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
 public:
  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending,
                            NullPlacement null_placement = NullPlacement::AtEnd);
  static constexpr char const kTypeName[] = "ArraySortOptions";
  static ArraySortOptions Defaults() { return ArraySortOptions(); }

  /// Sorting order
  SortOrder order;
  /// Whether nulls and NaNs are placed at the start or at the end
  NullPlacement null_placement;
};

class ARROW_EXPORT SortOptions : public FunctionOptions {
 public:
  explicit SortOptions(std::vector<SortKey> sort_keys = {},
                       NullPlacement null_placement = NullPlacement::AtEnd);
  explicit SortOptions(const Ordering& ordering);
  static constexpr char const kTypeName[] = "SortOptions";
  static SortOptions Defaults() { return SortOptions(); }
  /// Convenience constructor to create an ordering from SortOptions
  ///
  /// Note: Both classes contain the exact same information.  However,
  /// sort_options should only be used in a "function options" context while Ordering
  /// is used more generally.
  Ordering AsOrdering() && { return Ordering(std::move(sort_keys), null_placement); }
  Ordering AsOrdering() const& { return Ordering(sort_keys, null_placement); }

  /// Column key(s) to order by and how to order by these sort keys.
  std::vector<SortKey> sort_keys;
  /// Whether nulls and NaNs are placed at the start or at the end
  NullPlacement null_placement;
};

/// \brief SelectK options
class ARROW_EXPORT SelectKOptions : public FunctionOptions {
 public:
  explicit SelectKOptions(int64_t k = -1, std::vector<SortKey> sort_keys = {});
  static constexpr char const kTypeName[] = "SelectKOptions";
  static SelectKOptions Defaults() { return SelectKOptions(); }

  static SelectKOptions TopKDefault(int64_t k, std::vector<std::string> key_names = {}) {
    std::vector<SortKey> keys;
    for (const auto& name : key_names) {
      keys.emplace_back(SortKey(name, SortOrder::Descending));
    }
    if (key_names.empty()) {
      keys.emplace_back(SortKey("not-used", SortOrder::Descending));
    }
    return SelectKOptions{k, keys};
  }
  static SelectKOptions BottomKDefault(int64_t k,
                                       std::vector<std::string> key_names = {}) {
    std::vector<SortKey> keys;
    for (const auto& name : key_names) {
      keys.emplace_back(SortKey(name, SortOrder::Ascending));
    }
    if (key_names.empty()) {
      keys.emplace_back(SortKey("not-used", SortOrder::Ascending));
    }
    return SelectKOptions{k, keys};
  }

  /// The number of `k` elements to keep.
  int64_t k;
  /// Column key(s) to order by and how to order by these sort keys.
  std::vector<SortKey> sort_keys;
};

/// \brief Rank options
class ARROW_EXPORT RankOptions : public FunctionOptions {
 public:
  /// Configure how ties between equal values are handled
  enum Tiebreaker {
    /// Ties get the smallest possible rank in sorted order.
    Min,
    /// Ties get the largest possible rank in sorted order.
    Max,
    /// Ranks are assigned in order of when ties appear in the input.
    /// This ensures the ranks are a stable permutation of the input.
    First,
    /// The ranks span a dense [1, M] interval where M is the number
    /// of distinct values in the input.
    Dense
  };

  explicit RankOptions(std::vector<SortKey> sort_keys = {},
                       NullPlacement null_placement = NullPlacement::AtEnd,
                       Tiebreaker tiebreaker = RankOptions::First);
  /// Convenience constructor for array inputs
  explicit RankOptions(SortOrder order,
                       NullPlacement null_placement = NullPlacement::AtEnd,
                       Tiebreaker tiebreaker = RankOptions::First)
      : RankOptions({SortKey("", order)}, null_placement, tiebreaker) {}

  static constexpr char const kTypeName[] = "RankOptions";
  static RankOptions Defaults() { return RankOptions(); }

  /// Column key(s) to order by and how to order by these sort keys.
  std::vector<SortKey> sort_keys;
  /// Whether nulls and NaNs are placed at the start or at the end
  NullPlacement null_placement;
  /// Tiebreaker for dealing with equal values in ranks
  Tiebreaker tiebreaker;
};

/// \brief Partitioning options for NthToIndices
class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
 public:
  explicit PartitionNthOptions(int64_t pivot,
                               NullPlacement null_placement = NullPlacement::AtEnd);
  PartitionNthOptions() : PartitionNthOptions(0) {}
  static constexpr char const kTypeName[] = "PartitionNthOptions";

  /// The index into the equivalent sorted array of the partition pivot element.
  int64_t pivot;
  /// Whether nulls and NaNs are partitioned at the start or at the end
  NullPlacement null_placement;
};

/// \brief Options for cumulative functions
/// \note Also aliased as CumulativeSumOptions for backward compatibility
class ARROW_EXPORT CumulativeOptions : public FunctionOptions {
 public:
  explicit CumulativeOptions(bool skip_nulls = false);
  explicit CumulativeOptions(double start, bool skip_nulls = false);
  explicit CumulativeOptions(std::shared_ptr<Scalar> start, bool skip_nulls = false);
  static constexpr char const kTypeName[] = "CumulativeOptions";
  static CumulativeOptions Defaults() { return CumulativeOptions(); }

  /// Optional starting value for cumulative operation computation, default depends on the
  /// operation and input type.
  /// - sum: 0
  /// - prod: 1
  /// - min: maximum of the input type
  /// - max: minimum of the input type
  /// - mean: start is ignored because it has no meaning for mean
  std::optional<std::shared_ptr<Scalar>> start;

  /// If true, nulls in the input are ignored and produce a corresponding null output.
  /// When false, the first null encountered is propagated through the remaining output.
  bool skip_nulls = false;
};
using CumulativeSumOptions = CumulativeOptions;  // For backward compatibility

/// \brief Options for pairwise functions
class ARROW_EXPORT PairwiseOptions : public FunctionOptions {
 public:
  explicit PairwiseOptions(int64_t periods = 1);
  static constexpr char const kTypeName[] = "PairwiseOptions";
  static PairwiseOptions Defaults() { return PairwiseOptions(); }

  /// Periods to shift for applying the binary operation, accepts negative values.
  int64_t periods = 1;
};

/// \brief Options for list_flatten function
class ARROW_EXPORT ListFlattenOptions : public FunctionOptions {
 public:
  explicit ListFlattenOptions(bool recursive = false);
  static constexpr char const kTypeName[] = "ListFlattenOptions";
  static ListFlattenOptions Defaults() { return ListFlattenOptions(); }

  /// \brief If true, the list is flattened recursively until a non-list
  /// array is formed.
  bool recursive = false;
};

/// @}

/// \brief Filter with a boolean selection filter
///
/// The output will be populated with values from the input at positions
/// where the selection filter is not 0. Nulls in the filter will be handled
/// based on options.null_selection_behavior.
///
/// For example given values = ["a", "b", "c", null, "e", "f"] and
/// filter = [0, 1, 1, 0, null, 1], the output will be
/// (null_selection_behavior == DROP)      = ["b", "c", "f"]
/// (null_selection_behavior == EMIT_NULL) = ["b", "c", null, "f"]
///
/// \param[in] values array to filter
/// \param[in] filter indicates which values should be filtered out
/// \param[in] options configures null_selection_behavior
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
ARROW_EXPORT
Result<Datum> Filter(const Datum& values, const Datum& filter,
                     const FilterOptions& options = FilterOptions::Defaults(),
                     ExecContext* ctx = NULLPTR);

namespace internal {

// These internal functions are implemented in kernels/vector_selection.cc

/// \brief Return the number of selected indices in the boolean filter
///
/// \param filter a plain or run-end encoded boolean array with or without nulls
/// \param null_selection how to handle nulls in the filter
ARROW_EXPORT
int64_t GetFilterOutputSize(const ArraySpan& filter,
                            FilterOptions::NullSelectionBehavior null_selection);

/// \brief Compute uint64 selection indices for use with Take given a boolean
/// filter
///
/// \param filter a plain or run-end encoded boolean array with or without nulls
/// \param null_selection how to handle nulls in the filter
ARROW_EXPORT
Result<std::shared_ptr<ArrayData>> GetTakeIndices(
    const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection,
    MemoryPool* memory_pool = default_memory_pool());

}  // namespace internal

/// \brief ReplaceWithMask replaces each value in the array corresponding
/// to a true value in the mask with the next element from `replacements`.
///
/// \param[in] values Array input to replace
/// \param[in] mask Array or Scalar of Boolean mask values
/// \param[in] replacements The replacement values to draw from. There must
/// be as many replacement values as true values in the mask.
/// \param[in] ctx the function execution context, optional
///
/// \return the resulting datum
///
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
                              const Datum& replacements, ExecContext* ctx = NULLPTR);

/// \brief FillNullForward fill null values in forward direction
///
/// The output array will be of the same type as the input values
/// array, with replaced null values in forward direction.
///
/// For example given values = ["a", "b", "c", null, null, "f"],
/// the output will be = ["a", "b", "c", "c", "c", "f"]
///
/// \param[in] values datum from which to take
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
ARROW_EXPORT
Result<Datum> FillNullForward(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief FillNullBackward fill null values in backward direction
///
/// The output array will be of the same type as the input values
/// array, with replaced null values in backward direction.
///
/// For example given values = ["a", "b", "c", null, null, "f"],
/// the output will be = ["a", "b", "c", "f", "f", "f"]
///
Loading ...