Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ includes / libarrow_dataset_parquet.pxd

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++

from pyarrow.includes.libarrow_dataset cimport *
from pyarrow.includes.libparquet_encryption cimport *

from pyarrow._parquet cimport *


cdef extern from "arrow/dataset/parquet_encryption_config.h" namespace "arrow::dataset" nogil:
    cdef cppclass CParquetEncryptionConfig "arrow::dataset::ParquetEncryptionConfig":
        shared_ptr[CCryptoFactory] crypto_factory
        shared_ptr[CKmsConnectionConfig] kms_connection_config
        shared_ptr[CEncryptionConfiguration] encryption_config

    cdef cppclass CParquetDecryptionConfig "arrow::dataset::ParquetDecryptionConfig":
        shared_ptr[CCryptoFactory] crypto_factory
        shared_ptr[CKmsConnectionConfig] kms_connection_config
        shared_ptr[CDecryptionConfiguration] decryption_config


cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:

    cdef cppclass CParquetFileWriter \
            "arrow::dataset::ParquetFileWriter"(CFileWriter):
        const shared_ptr[FileWriter]& parquet_writer() const

    cdef cppclass CParquetFileWriteOptions \
            "arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
        shared_ptr[WriterProperties] writer_properties
        shared_ptr[ArrowWriterProperties] arrow_writer_properties
        shared_ptr[CParquetEncryptionConfig] parquet_encryption_config

    cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
            CFileFragment):
        const vector[int]& row_groups() const
        shared_ptr[CFileMetaData] metadata() const
        CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
            CExpression predicate)
        CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
            CExpression predicate)
        CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
            vector[int] row_group_ids)
        CStatus EnsureCompleteMetadata()

    cdef cppclass CParquetFileFormatReaderOptions \
            "arrow::dataset::ParquetFileFormat::ReaderOptions":
        unordered_set[c_string] dict_columns
        TimeUnit coerce_int96_timestamp_unit

    cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
            CFileFormat):
        CParquetFileFormatReaderOptions reader_options
        CResult[shared_ptr[CFileFragment]] MakeFragment(
            CFileSource source,
            CExpression partition_expression,
            shared_ptr[CSchema] physical_schema,
            vector[int] row_groups)

    cdef cppclass CParquetFragmentScanOptions \
            "arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
        shared_ptr[CReaderProperties] reader_properties
        shared_ptr[ArrowReaderProperties] arrow_reader_properties
        shared_ptr[CParquetDecryptionConfig] parquet_decryption_config

    cdef cppclass CParquetFactoryOptions \
            "arrow::dataset::ParquetFactoryOptions":
        CPartitioningOrFactory partitioning
        c_string partition_base_dir
        c_bool validate_column_chunk_paths

    cdef cppclass CParquetDatasetFactory \
            "arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
        @staticmethod
        CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
            const c_string& metadata_path,
            shared_ptr[CFileSystem] filesystem,
            shared_ptr[CParquetFileFormat] format,
            CParquetFactoryOptions options
        )

        @staticmethod
        CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
            const CFileSource& metadata_path,
            const c_string& base_path,
            shared_ptr[CFileSystem] filesystem,
            shared_ptr[CParquetFileFormat] format,
            CParquetFactoryOptions options
        )