Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ _parquet.pxd

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++
# cython: language_level = 3

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport (CChunkedArray, CScalar, CSchema, CStatus,
                                        CTable, CMemoryPool, CBuffer,
                                        CKeyValueMetadata, CRandomAccessFile,
                                        COutputStream, CCacheOptions,
                                        TimeUnit, CRecordBatchReader)
from pyarrow.lib cimport _Weakrefable


cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
    cdef cppclass Node:
        pass

    cdef cppclass GroupNode(Node):
        pass

    cdef cppclass PrimitiveNode(Node):
        pass

    cdef cppclass ColumnPath:
        c_string ToDotString()
        vector[c_string] ToDotVector()


cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
    enum ParquetType" parquet::Type::type":
        ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
        ParquetType_INT32" parquet::Type::INT32"
        ParquetType_INT64" parquet::Type::INT64"
        ParquetType_INT96" parquet::Type::INT96"
        ParquetType_FLOAT" parquet::Type::FLOAT"
        ParquetType_DOUBLE" parquet::Type::DOUBLE"
        ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
        ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"

    enum ParquetLogicalTypeId" parquet::LogicalType::Type::type":
        ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED"
        ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING"
        ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP"
        ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST"
        ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM"
        ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL"
        ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE"
        ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME"
        ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP"
        ParquetLogicalType_INT" parquet::LogicalType::Type::INT"
        ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON"
        ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON"
        ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID"
        ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE"

    enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit":
        ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN"
        ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS"
        ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS"
        ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS"

    enum ParquetConvertedType" parquet::ConvertedType::type":
        ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
        ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
        ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
        ParquetConvertedType_MAP_KEY_VALUE \
            " parquet::ConvertedType::MAP_KEY_VALUE"
        ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
        ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
        ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
        ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
        ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
        ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
        ParquetConvertedType_TIMESTAMP_MILLIS \
            " parquet::ConvertedType::TIMESTAMP_MILLIS"
        ParquetConvertedType_TIMESTAMP_MICROS \
            " parquet::ConvertedType::TIMESTAMP_MICROS"
        ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
        ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
        ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
        ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
        ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
        ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
        ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
        ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
        ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
        ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
        ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"

    enum ParquetRepetition" parquet::Repetition::type":
        ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
        ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL"
        ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED"

    enum ParquetEncoding" parquet::Encoding::type":
        ParquetEncoding_PLAIN" parquet::Encoding::PLAIN"
        ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY"
        ParquetEncoding_RLE" parquet::Encoding::RLE"
        ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED"
        ParquetEncoding_DELTA_BINARY_PACKED \
            " parquet::Encoding::DELTA_BINARY_PACKED"
        ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \
            " parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY"
        ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY"
        ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY"
        ParquetEncoding_BYTE_STREAM_SPLIT \
            " parquet::Encoding::BYTE_STREAM_SPLIT"

    enum ParquetCompression" parquet::Compression::type":
        ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED"
        ParquetCompression_SNAPPY" parquet::Compression::SNAPPY"
        ParquetCompression_GZIP" parquet::Compression::GZIP"
        ParquetCompression_LZO" parquet::Compression::LZO"
        ParquetCompression_BROTLI" parquet::Compression::BROTLI"
        ParquetCompression_LZ4" parquet::Compression::LZ4"
        ParquetCompression_ZSTD" parquet::Compression::ZSTD"

    enum ParquetVersion" parquet::ParquetVersion::type":
        ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
        ParquetVersion_V2_0" parquet::ParquetVersion::PARQUET_2_0"
        ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
        ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"

    enum ParquetSortOrder" parquet::SortOrder::type":
        ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
        ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED"
        ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN"

    cdef cppclass CParquetLogicalType" parquet::LogicalType":
        c_string ToString() const
        c_string ToJSON() const
        ParquetLogicalTypeId type() const

    cdef cppclass CParquetDecimalType \
            " parquet::DecimalLogicalType"(CParquetLogicalType):
        int32_t precision() const
        int32_t scale() const

    cdef cppclass CParquetIntType \
            " parquet::IntLogicalType"(CParquetLogicalType):
        int bit_width() const
        c_bool is_signed() const

    cdef cppclass CParquetTimeType \
            " parquet::TimeLogicalType"(CParquetLogicalType):
        c_bool is_adjusted_to_utc() const
        ParquetTimeUnit time_unit() const

    cdef cppclass CParquetTimestampType \
            " parquet::TimestampLogicalType"(CParquetLogicalType):
        c_bool is_adjusted_to_utc() const
        ParquetTimeUnit time_unit() const

    cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor":
        c_bool Equals(const ColumnDescriptor& other)

        shared_ptr[ColumnPath] path()
        int16_t max_definition_level()
        int16_t max_repetition_level()

        ParquetType physical_type()
        const shared_ptr[const CParquetLogicalType]& logical_type()
        ParquetConvertedType converted_type()
        const c_string& name()
        int type_length()
        int type_precision()
        int type_scale()

    cdef cppclass SchemaDescriptor:
        const ColumnDescriptor* Column(int i)
        shared_ptr[Node] schema()
        GroupNode* group()
        c_bool Equals(const SchemaDescriptor& other)
        c_string ToString()
        int num_columns()

    cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)

    enum ParquetCipher" parquet::ParquetCipher::type":
        ParquetCipher_AES_GCM_V1" parquet::ParquetCipher::AES_GCM_V1"
        ParquetCipher_AES_GCM_CTR_V1" parquet::ParquetCipher::AES_GCM_CTR_V1"

    struct AadMetadata:
        c_string aad_prefix
        c_string aad_file_unique
        c_bool supply_aad_prefix

    struct EncryptionAlgorithm:
        ParquetCipher algorithm
        AadMetadata aad

cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
    cdef cppclass ColumnReader:
        pass

    cdef cppclass BoolReader(ColumnReader):
        pass

    cdef cppclass Int32Reader(ColumnReader):
        pass

    cdef cppclass Int64Reader(ColumnReader):
        pass

    cdef cppclass Int96Reader(ColumnReader):
        pass

    cdef cppclass FloatReader(ColumnReader):
        pass

    cdef cppclass DoubleReader(ColumnReader):
        pass

    cdef cppclass ByteArrayReader(ColumnReader):
        pass

    cdef cppclass RowGroupReader:
        pass

    cdef cppclass CEncodedStatistics" parquet::EncodedStatistics":
        const c_string& max() const
        const c_string& min() const
        int64_t null_count
        int64_t distinct_count
        bint has_min
        bint has_max
        bint has_null_count
        bint has_distinct_count

    cdef cppclass ParquetByteArray" parquet::ByteArray":
        uint32_t len
        const uint8_t* ptr

    cdef cppclass ParquetFLBA" parquet::FLBA":
        const uint8_t* ptr

    cdef cppclass CStatistics" parquet::Statistics":
        int64_t null_count() const
        int64_t distinct_count() const
        int64_t num_values() const
        bint HasMinMax()
        bint HasNullCount()
        bint HasDistinctCount()
        c_bool Equals(const CStatistics&) const
        void Reset()
        c_string EncodeMin()
        c_string EncodeMax()
        CEncodedStatistics Encode()
        void SetComparator()
        ParquetType physical_type() const
        const ColumnDescriptor* descr() const

    cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics):
        c_bool min()
        c_bool max()

    cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics):
        int32_t min()
        int32_t max()

    cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics):
        int64_t min()
        int64_t max()

    cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics):
        float min()
        float max()

    cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics):
        double min()
        double max()

    cdef cppclass CByteArrayStatistics \
            " parquet::ByteArrayStatistics"(CStatistics):
        ParquetByteArray min()
        ParquetByteArray max()

    cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics):
        ParquetFLBA min()
        ParquetFLBA max()

    cdef cppclass CColumnCryptoMetaData" parquet::ColumnCryptoMetaData":
        shared_ptr[ColumnPath] path_in_schema() const
        c_bool encrypted_with_footer_key() const
        const c_string& key_metadata() const

    cdef cppclass ParquetIndexLocation" parquet::IndexLocation":
        int64_t offset
        int32_t length

    cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData":
        int64_t file_offset() const
        const c_string& file_path() const

        c_bool is_metadata_set() const
        ParquetType type() const
        int64_t num_values() const
        shared_ptr[ColumnPath] path_in_schema() const
        bint is_stats_set() const
        shared_ptr[CStatistics] statistics() const
        ParquetCompression compression() const
        const vector[ParquetEncoding]& encodings() const
        c_bool Equals(const CColumnChunkMetaData&) const

        int64_t has_dictionary_page() const
        int64_t dictionary_page_offset() const
        int64_t data_page_offset() const
        int64_t index_page_offset() const
        int64_t total_compressed_size() const
        int64_t total_uncompressed_size() const
        unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
        optional[ParquetIndexLocation] GetColumnIndexLocation() const
        optional[ParquetIndexLocation] GetOffsetIndexLocation() const

    struct CSortingColumn" parquet::SortingColumn":
        int column_idx
        c_bool descending
        c_bool nulls_first

    cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
        c_bool Equals(const CRowGroupMetaData&) const
        int num_columns() const
        int64_t num_rows() const
        int64_t total_byte_size() const
        vector[CSortingColumn] sorting_columns() const
        unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const

    cdef cppclass CFileMetaData" parquet::FileMetaData":
        c_bool Equals(const CFileMetaData&) const
Loading ...