include/ATen/cuda/CUDAEvent.h

neilisaac / torch python

Repository URL to install this package:
Version: 1.8.0

/ include / ATen / cuda / CUDAEvent.h

#pragma once

#include <ATen/cuda/ATenCUDAGeneral.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAStream.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/util/Exception.h>

#include <cuda_runtime_api.h>

#include <cstdint>
#include <utility>

namespace at { namespace cuda {

/*
* CUDAEvents are movable not copyable wrappers around CUDA's events.
*
* CUDAEvents are constructed lazily when first recorded unless it is
* reconstructed from a cudaIpcEventHandle_t. The event has a device, and this
* device is acquired from the first recording stream. However, if reconstructed
* from a handle, the device should be explicitly specified; or if ipc_handle() is
* called before the event is ever recorded, it will use the current device.
* Later streams that record the event must match this device.
*/
struct TORCH_CUDA_CPP_API CUDAEvent {
  // Constructors
  // Default value for `flags` is specified below - it's cudaEventDisableTiming
  CUDAEvent() {}
  CUDAEvent(unsigned int flags) : flags_{flags} {}

  CUDAEvent(
      DeviceIndex device_index, const cudaIpcEventHandle_t* handle) {
    #ifndef __HIP_PLATFORM_HCC__
      device_index_ = device_index;
      CUDAGuard guard(device_index_);

      AT_CUDA_CHECK(cudaIpcOpenEventHandle(&event_, *handle));
      is_created_ = true;
    #else
      AT_ERROR("cuIpcOpenEventHandle with HIP is not supported");
    #endif
  }

  // Note: event destruction done on creating device to avoid creating a
  // CUDA context on other devices.
  ~CUDAEvent() {
    try {
      if (is_created_) {
        CUDAGuard guard(device_index_);
        cudaEventDestroy(event_);
      }
    } catch (...) { /* No throw */ }
  }

  CUDAEvent(const CUDAEvent&) = delete;
  CUDAEvent& operator=(const CUDAEvent&) = delete;

  CUDAEvent(CUDAEvent&& other) { moveHelper(std::move(other)); }
  CUDAEvent& operator=(CUDAEvent&& other) {
    moveHelper(std::move(other));
    return *this;
  }

  operator cudaEvent_t() const { return event(); }

  // Less than operator (to allow use in sets)
  friend bool operator<(const CUDAEvent& left, const CUDAEvent& right) {
    return left.event_ < right.event_;
  }

  optional<at::Device> device() const {
    if (is_created_) {
      return at::Device(at::kCUDA, device_index_);
    } else {
      return {};
    }
  }

  bool isCreated() const { return is_created_; }
  DeviceIndex device_index() const {return device_index_;}
  cudaEvent_t event() const { return event_; }

  // Note: cudaEventQuery can be safely called from any device
  bool query() const {
    if (!is_created_) {
      return true;
    }

    cudaError_t err = cudaEventQuery(event_);
    if (err == cudaSuccess) {
      return true;
    } else if (err != cudaErrorNotReady) {
      C10_CUDA_CHECK(err);
    }

    return false;
  }

  void record() { record(getCurrentCUDAStream()); }

  void recordOnce(const CUDAStream& stream) {
    if (!was_recorded_) record(stream);
  }

  // Note: cudaEventRecord must be called on the same device as the event.
  void record(const CUDAStream& stream) {
    if (!is_created_) {
      createEvent(stream.device_index());
    }

    TORCH_CHECK(device_index_ == stream.device_index(), "Event device ", device_index_,
      " does not match recording stream's device ", stream.device_index(), ".");
    CUDAGuard guard(device_index_);
    AT_CUDA_CHECK(cudaEventRecord(event_, stream));
    was_recorded_ = true;
  }

  // Note: cudaStreamWaitEvent must be called on the same device as the stream.
  // The event has no actual GPU resources associated with it.
  void block(const CUDAStream& stream) {
    if (is_created_) {
      CUDAGuard guard(stream.device_index());
      AT_CUDA_CHECK(cudaStreamWaitEvent(stream, event_, 0));
    }
  }

  // Note: cudaEventElapsedTime can be safely called from any device
  float elapsed_time(const CUDAEvent& other) const {
    TORCH_CHECK(is_created_ && other.isCreated(),
      "Both events must be recorded before calculating elapsed time.");
    float time_ms = 0;
    // raise cudaErrorNotReady if either event is recorded but not yet completed
    AT_CUDA_CHECK(cudaEventElapsedTime(&time_ms, event_, other.event_));
    return time_ms;
  }

  // Note: cudaEventSynchronize can be safely called from any device
  void synchronize() const {
    if (is_created_) {
      AT_CUDA_CHECK(cudaEventSynchronize(event_));
    }
  }

  // Note: cudaIpcGetEventHandle must be called on the same device as the event
  void ipc_handle(cudaIpcEventHandle_t * handle) {
    #ifndef __HIP_PLATFORM_HCC__
      if (!is_created_) {
        // this CUDAEvent object was initially constructed from flags but event_
        // is not created yet.
        createEvent(getCurrentCUDAStream().device_index());
      }
      CUDAGuard guard(device_index_);
      AT_CUDA_CHECK(cudaIpcGetEventHandle(handle, event_));
    #else
      AT_ERROR("cuIpcGetEventHandle with HIP is not supported");
    #endif
  }

private:
  unsigned int flags_ = cudaEventDisableTiming;
  bool is_created_ = false;
  bool was_recorded_ = false;
  DeviceIndex device_index_ = -1;
  cudaEvent_t event_;

  void createEvent(DeviceIndex device_index) {
    device_index_ = device_index;
    CUDAGuard guard(device_index_);
    AT_CUDA_CHECK(cudaEventCreateWithFlags(&event_, flags_));
    is_created_ = true;
  }

  void moveHelper(CUDAEvent&& other) {
    std::swap(flags_, other.flags_);
    std::swap(is_created_, other.is_created_);
    std::swap(was_recorded_, other.was_recorded_);
    std::swap(device_index_, other.device_index_);
    std::swap(event_, other.event_);
  }
};

} // namespace cuda
} // namespace at
neilisaac / torch python

Version: 1.8.0

/ include / ATen / cuda / CUDAEvent.h

Products

About

Resources

Contact Gemfury