Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ _orc.pxd

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++
# cython: language_level = 3

from libcpp cimport bool as c_bool
from libc.string cimport const_char
from libcpp.vector cimport vector as std_vector
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
                                        CResult, CTable, CMemoryPool,
                                        CKeyValueMetadata,
                                        CRecordBatch,
                                        CTable, CCompressionType,
                                        CRandomAccessFile, COutputStream,
                                        TimeUnit)

cdef extern from "arrow/adapters/orc/options.h" \
        namespace "arrow::adapters::orc" nogil:
    cdef enum CompressionStrategy \
            " arrow::adapters::orc::CompressionStrategy":
        _CompressionStrategy_SPEED \
            " arrow::adapters::orc::CompressionStrategy::kSpeed"
        _CompressionStrategy_COMPRESSION \
            " arrow::adapters::orc::CompressionStrategy::kCompression"

    cdef enum WriterId" arrow::adapters::orc::WriterId":
        _WriterId_ORC_JAVA_WRITER" arrow::adapters::orc::WriterId::kOrcJava"
        _WriterId_ORC_CPP_WRITER" arrow::adapters::orc::WriterId::kOrcCpp"
        _WriterId_PRESTO_WRITER" arrow::adapters::orc::WriterId::kPresto"
        _WriterId_SCRITCHLEY_GO \
            " arrow::adapters::orc::WriterId::kScritchleyGo"
        _WriterId_TRINO_WRITER" arrow::adapters::orc::WriterId::kTrino"
        _WriterId_UNKNOWN_WRITER" arrow::adapters::orc::WriterId::kUnknown"

    cdef enum WriterVersion" arrow::adapters::orc::WriterVersion":
        _WriterVersion_ORIGINAL \
            " arrow::adapters::orc::WriterVersion::kOriginal"
        _WriterVersion_HIVE_8732 \
            " arrow::adapters::orc::WriterVersion::kHive8732"
        _WriterVersion_HIVE_4243 \
            " arrow::adapters::orc::WriterVersion::kHive4243"
        _WriterVersion_HIVE_12055 \
            " arrow::adapters::orc::WriterVersion::kHive12055"
        _WriterVersion_HIVE_13083 \
            " arrow::adapters::orc::WriterVersion::kHive13083"
        _WriterVersion_ORC_101" arrow::adapters::orc::WriterVersion::kOrc101"
        _WriterVersion_ORC_135" arrow::adapters::orc::WriterVersion::kOrc135"
        _WriterVersion_ORC_517" arrow::adapters::orc::WriterVersion::kOrc517"
        _WriterVersion_ORC_203" arrow::adapters::orc::WriterVersion::kOrc203"
        _WriterVersion_ORC_14" arrow::adapters::orc::WriterVersion::kOrc14"
        _WriterVersion_MAX" arrow::adapters::orc::WriterVersion::kMax"

    cdef cppclass FileVersion" arrow::adapters::orc::FileVersion":
        FileVersion(uint32_t major_version, uint32_t minor_version)
        uint32_t major_version()
        uint32_t minor_version()
        c_string ToString()

    cdef struct WriteOptions" arrow::adapters::orc::WriteOptions":
        int64_t batch_size
        FileVersion file_version
        int64_t stripe_size
        CCompressionType compression
        int64_t compression_block_size
        CompressionStrategy compression_strategy
        int64_t row_index_stride
        double padding_tolerance
        double dictionary_key_size_threshold
        std_vector[int64_t] bloom_filter_columns
        double bloom_filter_fpp


cdef extern from "arrow/adapters/orc/adapter.h" \
        namespace "arrow::adapters::orc" nogil:

    cdef cppclass ORCFileReader:
        @staticmethod
        CResult[unique_ptr[ORCFileReader]] Open(
            const shared_ptr[CRandomAccessFile]& file,
            CMemoryPool* pool)

        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()

        CResult[shared_ptr[CSchema]] ReadSchema()

        CResult[shared_ptr[CRecordBatch]] ReadStripe(int64_t stripe)
        CResult[shared_ptr[CRecordBatch]] ReadStripe(
            int64_t stripe, std_vector[c_string])

        CResult[shared_ptr[CTable]] Read()
        CResult[shared_ptr[CTable]] Read(std_vector[c_string])

        int64_t NumberOfStripes()
        int64_t NumberOfRows()
        FileVersion GetFileVersion()
        c_string GetSoftwareVersion()
        CResult[CCompressionType] GetCompression()
        int64_t GetCompressionSize()
        int64_t GetRowIndexStride()
        WriterId GetWriterId()
        int32_t GetWriterIdValue()
        WriterVersion GetWriterVersion()
        int64_t GetNumberOfStripeStatistics()
        int64_t GetContentLength()
        int64_t GetStripeStatisticsLength()
        int64_t GetFileFooterLength()
        int64_t GetFilePostscriptLength()
        int64_t GetFileLength()
        c_string GetSerializedFileTail()

    cdef cppclass ORCFileWriter:
        @staticmethod
        CResult[unique_ptr[ORCFileWriter]] Open(
            COutputStream* output_stream, const WriteOptions& writer_options)

        CStatus Write(const CTable& table)

        CStatus Close()