# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow_dataset cimport *
from pyarrow.includes.libparquet_encryption cimport *
from pyarrow._parquet cimport *
cdef extern from "arrow/dataset/parquet_encryption_config.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetEncryptionConfig "arrow::dataset::ParquetEncryptionConfig":
shared_ptr[CCryptoFactory] crypto_factory
shared_ptr[CKmsConnectionConfig] kms_connection_config
shared_ptr[CEncryptionConfiguration] encryption_config
cdef cppclass CParquetDecryptionConfig "arrow::dataset::ParquetDecryptionConfig":
shared_ptr[CCryptoFactory] crypto_factory
shared_ptr[CKmsConnectionConfig] kms_connection_config
shared_ptr[CDecryptionConfiguration] decryption_config
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetFileWriter \
"arrow::dataset::ParquetFileWriter"(CFileWriter):
const shared_ptr[FileWriter]& parquet_writer() const
cdef cppclass CParquetFileWriteOptions \
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
shared_ptr[WriterProperties] writer_properties
shared_ptr[ArrowWriterProperties] arrow_writer_properties
shared_ptr[CParquetEncryptionConfig] parquet_encryption_config
cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
CFileFragment):
const vector[int]& row_groups() const
shared_ptr[CFileMetaData] metadata() const
CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
vector[int] row_group_ids)
CStatus EnsureCompleteMetadata()
cdef cppclass CParquetFileFormatReaderOptions \
"arrow::dataset::ParquetFileFormat::ReaderOptions":
unordered_set[c_string] dict_columns
TimeUnit coerce_int96_timestamp_unit
cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
CFileFormat):
CParquetFileFormatReaderOptions reader_options
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema,
vector[int] row_groups)
cdef cppclass CParquetFragmentScanOptions \
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
shared_ptr[CReaderProperties] reader_properties
shared_ptr[ArrowReaderProperties] arrow_reader_properties
shared_ptr[CParquetDecryptionConfig] parquet_decryption_config
cdef cppclass CParquetFactoryOptions \
"arrow::dataset::ParquetFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool validate_column_chunk_paths
cdef cppclass CParquetDatasetFactory \
"arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
const c_string& metadata_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
const CFileSource& metadata_path,
const c_string& base_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)