Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

/ includes / libarrow_python.pxd

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *


ctypedef CInvalidRowResult PyInvalidRowCallback(object,
                                                const CCSVInvalidRow&)


cdef extern from "arrow/python/csv.h" namespace "arrow::py::csv":

    function[CInvalidRowHandler] MakeInvalidRowHandler(
        function[PyInvalidRowCallback], object handler)


cdef extern from "arrow/python/api.h" namespace "arrow::py":
    # Requires GIL
    CResult[shared_ptr[CDataType]] InferArrowType(
        object obj, object mask, c_bool pandas_null_sentinels)


cdef extern from "arrow/python/api.h" namespace "arrow::py::internal":
    object NewMonthDayNanoTupleType()
    CResult[PyObject*] MonthDayNanoIntervalArrayToPyList(
        const CMonthDayNanoIntervalArray& array)
    CResult[PyObject*] MonthDayNanoIntervalScalarToPyObject(
        const CMonthDayNanoIntervalScalar& scalar)


cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConversionType":
    cdef enum MapConversionType "arrow::py::MapConversionType":
        DEFAULT,
        LOSSY,
        STRICT_


cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
    shared_ptr[CDataType] GetPrimitiveType(Type type)

    object PyHalf_FromHalf(npy_half value)

    cdef cppclass PyConversionOptions:
        PyConversionOptions()

        shared_ptr[CDataType] type
        int64_t size
        CMemoryPool* pool
        c_bool from_pandas
        c_bool ignore_timezone
        c_bool strict

    # TODO Some functions below are not actually "nogil"

    CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
        object obj, object mask, const PyConversionOptions& options,
        CMemoryPool* pool)

    CResult[shared_ptr[CDataType]] NumPyDtypeToArrow(object dtype)

    CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
                           c_bool from_pandas,
                           const shared_ptr[CDataType]& type,
                           shared_ptr[CChunkedArray]* out)

    CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
                           c_bool from_pandas,
                           const shared_ptr[CDataType]& type,
                           const CCastOptions& cast_options,
                           shared_ptr[CChunkedArray]* out)

    CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
                            const vector[c_string]& dim_names,
                            shared_ptr[CTensor]* out)

    CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
                            PyObject** out)

    CStatus SparseCOOTensorToNdarray(
        const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base,
        PyObject** out_data, PyObject** out_coords)

    CStatus SparseCSRMatrixToNdarray(
        const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base,
        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)

    CStatus SparseCSCMatrixToNdarray(
        const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base,
        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)

    CStatus SparseCSFTensorToNdarray(
        const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base,
        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)

    CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao,
                                      object coords_ao,
                                      const vector[int64_t]& shape,
                                      const vector[c_string]& dim_names,
                                      shared_ptr[CSparseCOOTensor]* out)

    CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao,
                                      object indptr_ao, object indices_ao,
                                      const vector[int64_t]& shape,
                                      const vector[c_string]& dim_names,
                                      shared_ptr[CSparseCSRMatrix]* out)

    CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao,
                                      object indptr_ao, object indices_ao,
                                      const vector[int64_t]& shape,
                                      const vector[c_string]& dim_names,
                                      shared_ptr[CSparseCSCMatrix]* out)

    CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao,
                                      object indptr_ao, object indices_ao,
                                      const vector[int64_t]& shape,
                                      const vector[int64_t]& axis_order,
                                      const vector[c_string]& dim_names,
                                      shared_ptr[CSparseCSFTensor]* out)

    CStatus TensorToSparseCOOTensor(shared_ptr[CTensor],
                                    shared_ptr[CSparseCOOTensor]* out)

    CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor],
                                    shared_ptr[CSparseCSRMatrix]* out)

    CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor],
                                    shared_ptr[CSparseCSCMatrix]* out)

    CStatus TensorToSparseCSFTensor(shared_ptr[CTensor],
                                    shared_ptr[CSparseCSFTensor]* out)

    CStatus ConvertArrayToPandas(const PandasOptions& options,
                                 shared_ptr[CArray] arr,
                                 object py_ref, PyObject** out)

    CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
                                        shared_ptr[CChunkedArray] arr,
                                        object py_ref, PyObject** out)

    CStatus ConvertTableToPandas(const PandasOptions& options,
                                 shared_ptr[CTable] table,
                                 PyObject** out)

    void c_set_default_memory_pool \
        " arrow::py::set_default_memory_pool"(CMemoryPool* pool)\

    CMemoryPool* c_get_memory_pool \
        " arrow::py::get_memory_pool"()

    cdef cppclass PyBuffer(CBuffer):
        @staticmethod
        CResult[shared_ptr[CBuffer]] FromPyObject(object obj)

    cdef cppclass PyForeignBuffer(CBuffer):
        @staticmethod
        CStatus Make(const uint8_t* data, int64_t size, object base,
                     shared_ptr[CBuffer]* out)

    cdef cppclass PyReadableFile(CRandomAccessFile):
        PyReadableFile(object fo)

    cdef cppclass PyOutputStream(COutputStream):
        PyOutputStream(object fo)

    cdef cppclass PandasOptions:
        CMemoryPool* pool
        c_bool strings_to_categorical
        c_bool zero_copy_only
        c_bool integer_object_nulls
        c_bool date_as_object
        c_bool timestamp_as_object
        c_bool use_threads
        c_bool coerce_temporal_nanoseconds
        c_bool ignore_timezone
        c_bool deduplicate_objects
        c_bool safe_cast
        c_bool split_blocks
        c_bool self_destruct
        MapConversionType maps_as_pydicts
        c_bool decode_dictionaries
        unordered_set[c_string] categorical_columns
        unordered_set[c_string] extension_columns
        c_bool to_numpy

    cdef cppclass CSerializedPyObject" arrow::py::SerializedPyObject":
        shared_ptr[CRecordBatch] batch
        vector[shared_ptr[CTensor]] tensors

        CStatus WriteTo(COutputStream* dst)
        CStatus GetComponents(CMemoryPool* pool, PyObject** dst)

    CStatus SerializeObject(object context, object sequence,
                            CSerializedPyObject* out)

    CStatus DeserializeObject(object context,
                              const CSerializedPyObject& obj,
                              PyObject* base, PyObject** out)

    CStatus ReadSerializedObject(CRandomAccessFile* src,
                                 CSerializedPyObject* out)

    cdef cppclass SparseTensorCounts:
        SparseTensorCounts()
        int coo
        int csr
        int csc
        int csf
        int ndim_csf
        int num_total_tensors() const
        int num_total_buffers() const

    CStatus GetSerializedFromComponents(
        int num_tensors,
        const SparseTensorCounts& num_sparse_tensors,
        int num_ndarrays,
        int num_buffers,
        object buffers,
        CSerializedPyObject* out)


cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
    cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
        pass

    CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
    int64_t TimePoint_to_ns(CTimePoint val)
    CTimePoint TimePoint_from_s(double val)
    CTimePoint TimePoint_from_ns(int64_t val)

    CResult[c_string] TzinfoToString(PyObject* pytzinfo)
    CResult[PyObject*] StringToTzinfo(c_string)


cdef extern from "arrow/python/init.h":
    int arrow_init_numpy() except -1


cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
    int import_pyarrow() except -1


cdef extern from "arrow/python/common.h" namespace "arrow::py":
    c_bool IsPyError(const CStatus& status)
    void RestorePyError(const CStatus& status) except *


cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
    cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
        # This looks like the only way to satisfy both Cython 2 and Cython 3
        SharedPtrNoGIL& operator=(...)
    cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
        UniquePtrNoGIL& operator=(...)


cdef extern from "arrow/python/inference.h" namespace "arrow::py":
    c_bool IsPyBool(object o)
    c_bool IsPyInt(object o)
    c_bool IsPyFloat(object o)


cdef extern from "arrow/python/ipc.h" namespace "arrow::py":
    cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \
            (CRecordBatchReader):
        @staticmethod
        CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema],
                                                     object)


cdef extern from "arrow/python/ipc.h" namespace "arrow::py" nogil:
    cdef cppclass CCastingRecordBatchReader" arrow::py::CastingRecordBatchReader" \
            (CRecordBatchReader):
        @staticmethod
        CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CRecordBatchReader],
                                                     shared_ptr[CSchema])


cdef extern from "arrow/python/extension_type.h" namespace "arrow::py":
    cdef cppclass CPyExtensionType \
            " arrow::py::PyExtensionType"(CExtensionType):
        @staticmethod
        CStatus FromClass(const shared_ptr[CDataType] storage_type,
                          const c_string extension_name, object typ,
                          shared_ptr[CExtensionType]* out)

        @staticmethod
        CStatus FromInstance(shared_ptr[CDataType] storage_type,
                             object inst, shared_ptr[CExtensionType]* out)

        object GetInstance()
        CStatus SetInstance(object)

    c_string PyExtensionName()
    CStatus RegisterPyExtensionType(shared_ptr[CDataType])
    CStatus UnregisterPyExtensionType(c_string type_name)


cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark":
    void Benchmark_PandasObjectIsNull(object lst) except *


cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil:
    void GdbTestSession "arrow::gdb::TestSession"()