Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev259 

/ include / arrow / adapters / orc / options.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <vector>

#include "arrow/io/interfaces.h"
#include "arrow/status.h"
#include "arrow/util/type_fwd.h"
#include "arrow/util/visibility.h"

namespace arrow {

namespace adapters {

namespace orc {

enum class WriterId : int32_t {
  kOrcJava = 0,
  kOrcCpp = 1,
  kPresto = 2,
  kScritchleyGo = 3,
  kTrino = 4,
  kUnknown = INT32_MAX
};

enum class WriterVersion : int32_t {
  kOriginal = 0,
  kHive8732 = 1,
  kHive4243 = 2,
  kHive12055 = 3,
  kHive13083 = 4,
  kOrc101 = 5,
  kOrc135 = 6,
  kOrc517 = 7,
  kOrc203 = 8,
  kOrc14 = 9,
  kMax = INT32_MAX
};

enum class CompressionStrategy : int32_t { kSpeed = 0, kCompression };

class ARROW_EXPORT FileVersion {
 private:
  int32_t major_version_;
  int32_t minor_version_;

 public:
  static const FileVersion& v_0_11();
  static const FileVersion& v_0_12();

  FileVersion(int32_t major, int32_t minor)
      : major_version_(major), minor_version_(minor) {}

  /**
   * Get major version
   */
  int32_t major_version() const { return this->major_version_; }

  /**
   * Get minor version
   */
  int32_t minor_version() const { return this->minor_version_; }

  bool operator==(const FileVersion& right) const {
    return this->major_version() == right.major_version() &&
           this->minor_version() == right.minor_version();
  }

  bool operator!=(const FileVersion& right) const { return !(*this == right); }

  std::string ToString() const;
};

/// Options for the ORC Writer
struct ARROW_EXPORT WriteOptions {
  /// Number of rows the ORC writer writes at a time, default 1024
  int64_t batch_size = 1024;
  /// Which ORC file version to use, default FileVersion(0, 12)
  FileVersion file_version = FileVersion(0, 12);
  /// Size of each ORC stripe in bytes, default 64 MiB
  int64_t stripe_size = 64 * 1024 * 1024;
  /// The compression codec of the ORC file, there is no compression by default
  Compression::type compression = Compression::UNCOMPRESSED;
  /// The size of each compression block in bytes, default 64 KiB
  int64_t compression_block_size = 64 * 1024;
  /// The compression strategy i.e. speed vs size reduction, default
  /// CompressionStrategy::kSpeed
  CompressionStrategy compression_strategy = CompressionStrategy::kSpeed;
  /// The number of rows per an entry in the row index, default 10000
  int64_t row_index_stride = 10000;
  /// The padding tolerance, default 0.0
  double padding_tolerance = 0.0;
  /// The dictionary key size threshold. 0 to disable dictionary encoding.
  /// 1 to always enable dictionary encoding, default 0.0
  double dictionary_key_size_threshold = 0.0;
  /// The array of columns that use the bloom filter, default empty
  std::vector<int64_t> bloom_filter_columns;
  /// The upper limit of the false-positive rate of the bloom filter, default 0.05
  double bloom_filter_fpp = 0.05;
};

}  // namespace orc
}  // namespace adapters
}  // namespace arrow