Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / array / array_run_end.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// Array accessor classes run-end encoded arrays

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

/// \addtogroup run-end-encoded-arrays
///
/// @{

// ----------------------------------------------------------------------
// RunEndEncoded

/// \brief Array type for run-end encoded data
class ARROW_EXPORT RunEndEncodedArray : public Array {
 private:
  std::shared_ptr<Array> run_ends_array_;
  std::shared_ptr<Array> values_array_;

 public:
  using TypeClass = RunEndEncodedType;

  explicit RunEndEncodedArray(const std::shared_ptr<ArrayData>& data);

  /// \brief Construct a RunEndEncodedArray from all parameters
  ///
  /// The length and offset parameters refer to the dimensions of the logical
  /// array which is the array we would get after expanding all the runs into
  /// repeated values. As such, length can be much greater than the length of
  /// the child run_ends and values arrays.
  RunEndEncodedArray(const std::shared_ptr<DataType>& type, int64_t length,
                     const std::shared_ptr<Array>& run_ends,
                     const std::shared_ptr<Array>& values, int64_t offset = 0);

  /// \brief Construct a RunEndEncodedArray from all parameters
  ///
  /// The length and offset parameters refer to the dimensions of the logical
  /// array which is the array we would get after expanding all the runs into
  /// repeated values. As such, length can be much greater than the length of
  /// the child run_ends and values arrays.
  static Result<std::shared_ptr<RunEndEncodedArray>> Make(
      const std::shared_ptr<DataType>& type, int64_t logical_length,
      const std::shared_ptr<Array>& run_ends, const std::shared_ptr<Array>& values,
      int64_t logical_offset = 0);

  /// \brief Construct a RunEndEncodedArray from values and run ends arrays
  ///
  /// The data type is automatically inferred from the arguments.
  /// The run_ends and values arrays must have the same length.
  static Result<std::shared_ptr<RunEndEncodedArray>> Make(
      int64_t logical_length, const std::shared_ptr<Array>& run_ends,
      const std::shared_ptr<Array>& values, int64_t logical_offset = 0);

 protected:
  void SetData(const std::shared_ptr<ArrayData>& data);

 public:
  /// \brief Returns an array holding the logical indexes of each run-end
  ///
  /// The physical offset to the array is applied.
  const std::shared_ptr<Array>& run_ends() const { return run_ends_array_; }

  /// \brief Returns an array holding the values of each run
  ///
  /// The physical offset to the array is applied.
  const std::shared_ptr<Array>& values() const { return values_array_; }

  /// \brief Returns an array holding the logical indexes of each run end
  ///
  /// If a non-zero logical offset is set, this function allocates a new
  /// array and rewrites all the run end values to be relative to the logical
  /// offset and cuts the end of the array to the logical length.
  Result<std::shared_ptr<Array>> LogicalRunEnds(MemoryPool* pool) const;

  /// \brief Returns an array holding the values of each run
  ///
  /// If a non-zero logical offset is set, this function allocates a new
  /// array containing only the values within the logical range.
  std::shared_ptr<Array> LogicalValues() const;

  /// \brief Find the physical offset of this REE array
  ///
  /// This function uses binary-search, so it has a O(log N) cost.
  int64_t FindPhysicalOffset() const;

  /// \brief Find the physical length of this REE array
  ///
  /// The physical length of an REE is the number of physical values (and
  /// run-ends) necessary to represent the logical range of values from offset
  /// to length.
  ///
  /// Avoid calling this function if the physical length can be established in
  /// some other way (e.g. when iterating over the runs sequentially until the
  /// end). This function uses binary-search, so it has a O(log N) cost.
  int64_t FindPhysicalLength() const;
};

/// @}

}  // namespace arrow