Repository URL to install this package:
|
Version:
0.36.2 ▾
|
"""
Expose each GPU devices directly.
This module implements a API that is like the "CUDA runtime" context manager
for managing CUDA context stack and clean up. It relies on thread-local globals
to separate the context stack management of each thread. Contexts are also
sharable among threads. Only the main thread can destroy Contexts.
Note:
- This module must be imported by the main-thread.
"""
from __future__ import print_function, absolute_import, division
import functools
import threading
from numba import servicelib
from .driver import driver
class _DeviceList(object):
def __getattr__(self, attr):
# First time looking at "lst" attribute.
if attr == "lst":
# Device list is not initialized.
# Query all CUDA devices.
numdev = driver.get_device_count()
gpus = [_DeviceContextManager(driver.get_device(devid))
for devid in range(numdev)]
# Define "lst" to avoid re-initialization
self.lst = gpus
return gpus
# Other attributes
return super(_DeviceList, self).__getattr__(attr)
def __getitem__(self, devnum):
'''
Returns the context manager for device *devnum*.
'''
return self.lst[devnum]
def __str__(self):
return ', '.join([str(d) for d in self.lst])
def __iter__(self):
return iter(self.lst)
def __len__(self):
return len(self.lst)
@property
def current(self):
"""Returns the active device or None if there's no active device
"""
if _runtime.context_stack:
return self.lst[_runtime.current_context.device.id]
class _DeviceContextManager(object):
"""
Provides a context manager for executing in the context of the chosen
device. The normal use of instances of this type is from
``numba.cuda.gpus``. For example, to execute on device 2::
with numba.cuda.gpus[2]:
d_a = numba.cuda.to_device(a)
to copy the array *a* onto device 2, referred to by *d_a*.
"""
def __init__(self, device):
self._device = device
def __getattr__(self, item):
return getattr(self._device, item)
def __enter__(self):
_runtime.push_context(self)
def __exit__(self, exc_type, exc_val, exc_tb):
_runtime.pop_context()
def __str__(self):
return "<Managed Device {self.id}>".format(self=self)
class _Runtime(object):
"""Emulate the CUDA runtime context management.
It owns all Devices and Contexts.
Keeps at most one Context per Device
"""
def __init__(self):
self.gpus = _DeviceList()
# A thread local stack
self.context_stack = servicelib.TLStack()
# Remember the main thread
# Only the main thread can *actually* destroy
self._mainthread = threading.current_thread()
# Avoid mutation of runtime state in multithreaded programs
self._lock = threading.RLock()
@property
def current_context(self):
"""Return the active gpu context
"""
return self.context_stack.top
def _get_or_create_context(self, gpu):
"""Try to use a already created context for the given gpu. If none
existed, create a new context.
Returns the context
"""
with self._lock:
ctx = gpu.get_primary_context()
ctx.push()
return ctx
def push_context(self, gpu):
"""Push a context for the given GPU or create a new one if no context
exist for the given GPU.
"""
# Context stack is empty or the active device is not the given gpu
if self.context_stack.is_empty or self.current_context.device != gpu:
ctx = self._get_or_create_context(gpu)
# Active context is from the gpu
else:
ctx = self.current_context
# Always put the new context on the stack
self.context_stack.push(ctx)
return ctx
def pop_context(self):
"""Pop a context from the context stack if there is more than
one context in the stack.
Will not remove the last context in the stack.
"""
ctx = self.current_context
# If there is more than one context
# Do not pop the last context so there is always a active context
if len(self.context_stack) > 1:
ctx.pop()
self.context_stack.pop()
assert self.context_stack
def get_or_create_context(self, devnum):
"""Returns the current context or push/create a context for the GPU
with the given device number.
"""
if self.context_stack:
return self.current_context
else:
with self._lock:
return self.push_context(self.gpus[devnum])
def reset(self):
"""Clear all contexts in the thread. Destroy the context if and only
if we are in the main thread.
"""
# Clear the context stack
while self.context_stack:
ctx = self.context_stack.pop()
ctx.pop()
# If it is the main thread
if threading.current_thread() == self._mainthread:
self._destroy_all_contexts()
def _destroy_all_contexts(self):
# Reset all devices
for gpu in self.gpus:
gpu.reset()
_runtime = _Runtime()
# ================================ PUBLIC API ================================
gpus = _runtime.gpus
def get_context(devnum=0):
"""Get the current device or use a device by device number, and
return the CUDA context.
"""
return _runtime.get_or_create_context(devnum)
def require_context(fn):
"""
A decorator that ensures a CUDA context is available when *fn* is executed.
Decorating *fn* is equivalent to writing::
get_context()
fn()
at each call site.
"""
@functools.wraps(fn)
def _require_cuda_context(*args, **kws):
get_context()
return fn(*args, **kws)
return _require_cuda_context
def reset():
"""Reset the CUDA subsystem for the current thread.
In the main thread:
This removes all CUDA contexts. Only use this at shutdown or for
cleaning up between tests.
In non-main threads:
This clear the CUDA context stack only.
"""
_runtime.reset()