Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / pyarrow   python

Repository URL to install this package:

Version: 19.0.0.dev246 

/ src / arrow / python / common.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <functional>
#include <memory>
#include <optional>
#include <utility>

#include "arrow/buffer.h"
#include "arrow/python/pyarrow.h"
#include "arrow/python/visibility.h"
#include "arrow/result.h"
#include "arrow/util/macros.h"

namespace arrow {

class MemoryPool;
template <class T>
class Result;

namespace py {

// Convert current Python error to a Status.  The Python error state is cleared
// and can be restored with RestorePyError().
ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
// Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
// Restore a Python error wrapped in a Status.
ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);

// Catch a pending Python exception and return the corresponding Status.
// If no exception is pending, Status::OK() is returned.
inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
    return Status::OK();
  } else {
    return ConvertPyError(code);
  }
}

#define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError())

#define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE))

// For Cython, as you can't define template C++ functions in Cython, only use them.
// This function can set a Python exception.  It assumes that T has a (cheap)
// default constructor.
template <class T>
T GetResultValue(Result<T> result) {
  if (ARROW_PREDICT_TRUE(result.ok())) {
    return *std::move(result);
  } else {
    int r = internal::check_status(result.status());  // takes the GIL
    assert(r == -1);                                  // should have errored out
    ARROW_UNUSED(r);
    return {};
  }
}

/// \brief Wrap a Result and return the corresponding Python object.
///
/// If the Result is successful, py_wrapper is called with its result value
/// and should return a PyObject*. If py_wrapper is successful (returns
/// a non-NULL value), its return value is returned.
///
/// If either the Result or py_wrapper fails, the associated Python exception
/// is raised and NULL is returned.
//
/// \param result The Result whose value to wrap in a Python object.
/// \param py_wrapper A function (likely defined in Cython) to convert the C++
///   value of the Result to a Python object.
/// \return A new Python reference, or NULL if an exception occurred
template <typename T, typename PyWrapper = PyObject* (*)(T)>
PyObject* WrapResult(Result<T> result, PyWrapper&& py_wrapper) {
  static_assert(std::is_same_v<PyObject*, decltype(py_wrapper(std::declval<T>()))>,
                "PyWrapper argument to WrapResult should return a PyObject* "
                "when called with a T*");
  Status st = result.status();
  if (st.ok()) {
    PyObject* py_value = py_wrapper(result.MoveValueUnsafe());
    st = CheckPyError();
    if (st.ok()) {
      return py_value;
    }
    Py_XDECREF(py_value);  // should be null, but who knows
  }
  // Status is an error, convert it to an exception.
  return internal::convert_status(st);
}

// A RAII-style helper that ensures the GIL is acquired inside a lexical block.
class ARROW_PYTHON_EXPORT PyAcquireGIL {
 public:
  PyAcquireGIL() : acquired_gil_(false) { acquire(); }

  ~PyAcquireGIL() { release(); }

  void acquire() {
    if (!acquired_gil_) {
      state_ = PyGILState_Ensure();
      acquired_gil_ = true;
    }
  }

  // idempotent
  void release() {
    if (acquired_gil_) {
      PyGILState_Release(state_);
      acquired_gil_ = false;
    }
  }

 private:
  bool acquired_gil_;
  PyGILState_STATE state_;
  ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
};

// A RAII-style helper that releases the GIL until the end of a lexical block
class ARROW_PYTHON_EXPORT PyReleaseGIL {
 public:
  PyReleaseGIL() : ptr_(PyEval_SaveThread(), &unique_ptr_deleter) {}

 private:
  static void unique_ptr_deleter(PyThreadState* state) {
    if (state) {
      PyEval_RestoreThread(state);
    }
  }
  std::unique_ptr<PyThreadState, decltype(&unique_ptr_deleter)> ptr_;
};

// A helper to call safely into the Python interpreter from arbitrary C++ code.
// The GIL is acquired, and the current thread's error status is preserved.
template <typename Function>
auto SafeCallIntoPython(Function&& func) -> decltype(func()) {
  PyAcquireGIL lock;
  PyObject* exc_type;
  PyObject* exc_value;
  PyObject* exc_traceback;
  PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
  auto maybe_status = std::forward<Function>(func)();
  // If the return Status is a "Python error", the current Python error status
  // describes the error and shouldn't be clobbered.
  if (!IsPyError(::arrow::internal::GenericToStatus(maybe_status)) &&
      exc_type != NULLPTR) {
    PyErr_Restore(exc_type, exc_value, exc_traceback);
  }
  return maybe_status;
}

template <typename Function>
auto SafeCallIntoPythonVoid(Function&& func) -> decltype(func()) {
  PyAcquireGIL lock;
  PyObject* exc_type;
  PyObject* exc_value;
  PyObject* exc_traceback;
  PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
  func();
  if (exc_type != NULLPTR) {
    PyErr_Restore(exc_type, exc_value, exc_traceback);
  }
}

// A RAII primitive that DECREFs the underlying PyObject* when it
// goes out of scope.
class ARROW_PYTHON_EXPORT OwnedRef {
 public:
  OwnedRef() : obj_(NULLPTR) {}
  OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
  explicit OwnedRef(PyObject* obj) : obj_(obj) {}

  OwnedRef& operator=(OwnedRef&& other) {
    obj_ = other.detach();
    return *this;
  }

  ~OwnedRef() {
    // GH-38626: destructor may be called after the Python interpreter is finalized.
    if (Py_IsInitialized()) {
      reset();
    }
  }

  void reset(PyObject* obj) {
    Py_XDECREF(obj_);
    obj_ = obj;
  }

  void reset() { reset(NULLPTR); }

  PyObject* detach() {
    PyObject* result = obj_;
    obj_ = NULLPTR;
    return result;
  }

  PyObject* obj() const { return obj_; }

  PyObject** ref() { return &obj_; }

  operator bool() const { return obj_ != NULLPTR; }

 private:
  ARROW_DISALLOW_COPY_AND_ASSIGN(OwnedRef);

  PyObject* obj_;
};

// Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
// This is for situations where the GIL is not always known to be held
// (e.g. if it is released in the middle of a function for performance reasons)
class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
 public:
  OwnedRefNoGIL() : OwnedRef() {}
  OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
  explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}

  ~OwnedRefNoGIL() {
    // GH-38626: destructor may be called after the Python interpreter is finalized.
    if (Py_IsInitialized() && obj() != NULLPTR) {
      PyAcquireGIL lock;
      reset();
    }
  }
};

template <template <typename...> typename SmartPtr, typename... Ts>
class SmartPtrNoGIL : public SmartPtr<Ts...> {
  using Base = SmartPtr<Ts...>;

 public:
  template <typename... Args>
  SmartPtrNoGIL(Args&&... args) : Base(std::forward<Args>(args)...) {}

  ~SmartPtrNoGIL() { reset(); }

  template <typename... Args>
  void reset(Args&&... args) {
    auto release_guard = optional_gil_release();
    Base::reset(std::forward<Args>(args)...);
  }

  template <typename V>
  SmartPtrNoGIL& operator=(V&& v) {
    auto release_guard = optional_gil_release();
    Base::operator=(std::forward<V>(v));
    return *this;
  }

 private:
  // Only release the GIL if we own an object *and* the Python runtime is
  // valid *and* the GIL is held.
  std::optional<PyReleaseGIL> optional_gil_release() const {
    if (this->get() != nullptr && Py_IsInitialized() && PyGILState_Check()) {
      return PyReleaseGIL();
    }
    return {};
  }
};

/// \brief A std::shared_ptr<T, ...> subclass that releases the GIL when destroying T
template <typename... Ts>
using SharedPtrNoGIL = SmartPtrNoGIL<std::shared_ptr, Ts...>;

/// \brief A std::unique_ptr<T, ...> subclass that releases the GIL when destroying T
template <typename... Ts>
using UniquePtrNoGIL = SmartPtrNoGIL<std::unique_ptr, Ts...>;

template <typename Fn>
struct BoundFunction;

template <typename... Args>
struct BoundFunction<void(PyObject*, Args...)> {
  // We bind `cdef void fn(object, ...)` to get a `Status(...)`
  // where the Status contains any Python error raised by `fn`
  using Unbound = void(PyObject*, Args...);
  using Bound = Status(Args...);

  BoundFunction(Unbound* unbound, PyObject* bound_arg)
      : unbound_(unbound), bound_arg_(bound_arg) {}

  Status Invoke(Args... args) const {
    PyAcquireGIL lock;
    unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
    RETURN_IF_PYERROR();
    return Status::OK();
  }

  Unbound* unbound_;
  OwnedRefNoGIL bound_arg_;
};

template <typename Return, typename... Args>
struct BoundFunction<Return(PyObject*, Args...)> {
  // We bind `cdef Return fn(object, ...)` to get a `Result<Return>(...)`
  // where the Result contains any Python error raised by `fn` or the
  // return value from `fn`.
  using Unbound = Return(PyObject*, Args...);
  using Bound = Result<Return>(Args...);

  BoundFunction(Unbound* unbound, PyObject* bound_arg)
      : unbound_(unbound), bound_arg_(bound_arg) {}

  Result<Return> Invoke(Args... args) const {
    PyAcquireGIL lock;
    Return ret = unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
    RETURN_IF_PYERROR();
    return ret;
  }

  Unbound* unbound_;
  OwnedRefNoGIL bound_arg_;
};

template <typename OutFn, typename Return, typename... Args>
std::function<OutFn> BindFunction(Return (*unbound)(PyObject*, Args...),
                                  PyObject* bound_arg) {
  using Fn = BoundFunction<Return(PyObject*, Args...)>;

  static_assert(std::is_same<typename Fn::Bound, OutFn>::value,
                "requested bound function of unsupported type");

  Py_XINCREF(bound_arg);
  auto bound_fn = std::make_shared<Fn>(unbound, bound_arg);
  return
      [bound_fn](Args... args) { return bound_fn->Invoke(std::forward<Args>(args)...); };
}

// A temporary conversion of a Python object to a bytes area.
struct PyBytesView {
  const char* bytes;
  Py_ssize_t size;
  bool is_utf8;

  static Result<PyBytesView> FromString(PyObject* obj, bool check_utf8 = false) {
    PyBytesView self;
    ARROW_RETURN_NOT_OK(self.ParseString(obj, check_utf8));
    return std::move(self);
  }

  static Result<PyBytesView> FromUnicode(PyObject* obj) {
    PyBytesView self;
    ARROW_RETURN_NOT_OK(self.ParseUnicode(obj));
    return std::move(self);
  }

  static Result<PyBytesView> FromBinary(PyObject* obj) {
    PyBytesView self;
    ARROW_RETURN_NOT_OK(self.ParseBinary(obj));
    return std::move(self);
  }

  // View the given Python object as string-like, i.e. str or (utf8) bytes
  Status ParseString(PyObject* obj, bool check_utf8 = false) {
    if (PyUnicode_Check(obj)) {
      return ParseUnicode(obj);
    } else {
      ARROW_RETURN_NOT_OK(ParseBinary(obj));
      if (check_utf8) {
        // Check the bytes are utf8 utf-8
        OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
        if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
          is_utf8 = true;
        } else {
          PyErr_Clear();
          is_utf8 = false;
        }
      }
      return Status::OK();
    }
  }

  // View the given Python object as unicode string
  Status ParseUnicode(PyObject* obj) {
    // The utf-8 representation is cached on the unicode object
    bytes = PyUnicode_AsUTF8AndSize(obj, &size);
    RETURN_IF_PYERROR();
    is_utf8 = true;
    return Status::OK();
  }

  // View the given Python object as binary-like, i.e. bytes
  Status ParseBinary(PyObject* obj) {
    if (PyBytes_Check(obj)) {
      bytes = PyBytes_AS_STRING(obj);
      size = PyBytes_GET_SIZE(obj);
      is_utf8 = false;
    } else if (PyByteArray_Check(obj)) {
      bytes = PyByteArray_AS_STRING(obj);
      size = PyByteArray_GET_SIZE(obj);
      is_utf8 = false;
    } else if (PyMemoryView_Check(obj)) {
      PyObject* ref = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
      RETURN_IF_PYERROR();
      Py_buffer* buffer = PyMemoryView_GET_BUFFER(ref);
      bytes = reinterpret_cast<const char*>(buffer->buf);
      size = buffer->len;
      is_utf8 = false;
    } else {
      return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj)->tp_name,
                               "' object");
    }
    return Status::OK();
  }

 protected:
  OwnedRef ref;
};

class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
 public:
  /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
  /// one-dimensional byte buffers.
  ~PyBuffer();

  static Result<std::shared_ptr<Buffer>> FromPyObject(PyObject* obj);

 private:
  PyBuffer();
  Status Init(PyObject*);

  Py_buffer py_buf_;
};

// Return the common PyArrow memory pool
ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();

// This is annoying: because C++11 does not allow implicit conversion of string
// literals to non-const char*, we need to go through some gymnastics to use
// PyObject_CallMethod without a lot of pain (its arguments are non-const
// char*)
template <typename... ArgTypes>
static inline PyObject* cpp_PyObject_CallMethod(PyObject* obj, const char* method_name,
                                                const char* argspec, ArgTypes... args) {
  return PyObject_CallMethod(obj, const_cast<char*>(method_name),
                             const_cast<char*>(argspec), args...);
}

}  // namespace py
}  // namespace arrow