Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

edgify / pycuda   python

Repository URL to install this package:

Version: 2020.1 

/ compiler.py

from __future__ import absolute_import
from __future__ import print_function
from pytools import memoize
# don't import pycuda.driver here--you'll create an import loop
import sys
from tempfile import mkstemp
from os import unlink

from pytools.prefork import call_capture_output
from six.moves import map


@memoize
def get_nvcc_version(nvcc):
    cmdline = [nvcc, "--version"]
    result, stdout, stderr = call_capture_output(cmdline)

    if result != 0 or not stdout:
        from warnings import warn
        warn("NVCC version could not be determined.")
        stdout = "nvcc unknown version"

    return stdout.decode("utf-8", "replace")


def _new_md5():
    try:
        import hashlib
        return hashlib.md5()
    except ImportError:
        # for Python << 2.5
        import md5
        return md5.new()


def preprocess_source(source, options, nvcc):
    handle, source_path = mkstemp(suffix='.cu')

    outf = open(source_path, 'w')
    outf.write(source)
    outf.close()
    os.close(handle)

    cmdline = [nvcc, '--preprocess'] + options + [source_path]
    if 'win32' in sys.platform:
        cmdline.extend(['--compiler-options', '-EP'])
    else:
        cmdline.extend(['--compiler-options', '-P'])

    result, stdout, stderr = call_capture_output(cmdline, error_on_nonzero=False)

    if result != 0:
        from pycuda.driver import CompileError
        raise CompileError("nvcc preprocessing of %s failed" % source_path,
                           cmdline, stderr=stderr)

    # sanity check
    if len(stdout) < 0.5*len(source):
        from pycuda.driver import CompileError
        raise CompileError("nvcc preprocessing of %s failed with ridiculously "
                "small code output - likely unsupported compiler." % source_path,
                cmdline, stderr=stderr.decode("utf-8", "replace"))

    unlink(source_path)

    return stdout.decode("utf-8", "replace")


def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
    from os.path import join

    assert target in ["cubin", "ptx", "fatbin"]

    if cache_dir:
        checksum = _new_md5()

        if '#include' in source:
            checksum.update(preprocess_source(source, options, nvcc).encode("utf-8"))
        else:
            checksum.update(source.encode("utf-8"))

        for option in options:
            checksum.update(option.encode("utf-8"))
        checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
        from pycuda.characterize import platform_bits
        checksum.update(str(platform_bits()).encode("utf-8"))

        cache_file = checksum.hexdigest()
        cache_path = join(cache_dir, cache_file + "." + target)

        try:
            cache_file = open(cache_path, "rb")
            try:
                return cache_file.read()
            finally:
                cache_file.close()

        except Exception:
            pass

    from tempfile import mkdtemp
    file_dir = mkdtemp()
    file_root = "kernel"

    cu_file_name = file_root + ".cu"
    cu_file_path = join(file_dir, cu_file_name)

    outf = open(cu_file_path, "w")
    outf.write(str(source))
    outf.close()

    if keep:
        options = options[:]
        options.append("--keep")

        print("*** compiler output in %s" % file_dir)

    cmdline = [nvcc, "--" + target] + options + [cu_file_name]
    result, stdout, stderr = call_capture_output(cmdline,
            cwd=file_dir, error_on_nonzero=False)

    try:
        result_f = open(join(file_dir, file_root + "." + target), "rb")
    except IOError:
        no_output = True
    else:
        no_output = False

    if result != 0 or (no_output and (stdout or stderr)):
        if result == 0:
            from warnings import warn
            warn("PyCUDA: nvcc exited with status 0, but appears to have "
                    "encountered an error")
        from pycuda.driver import CompileError
        raise CompileError("nvcc compilation of %s failed" % cu_file_path,
                cmdline, stdout=stdout.decode("utf-8", "replace"),
                stderr=stderr.decode("utf-8", "replace"))

    if stdout or stderr:
        lcase_err_text = (stdout+stderr).decode("utf-8", "replace").lower()
        from warnings import warn
        if "demoted" in lcase_err_text or "demoting" in lcase_err_text:
            warn("nvcc said it demoted types in source code it "
                "compiled--this is likely not what you want.",
                stacklevel=4)
        warn("The CUDA compiler succeeded, but said the following:\n"
                + (stdout+stderr).decode("utf-8", "replace"), stacklevel=4)

    result_data = result_f.read()
    result_f.close()

    if cache_dir:
        outf = open(cache_path, "wb")
        outf.write(result_data)
        outf.close()

    if not keep:
        from os import listdir, unlink, rmdir
        for name in listdir(file_dir):
            unlink(join(file_dir, name))
        rmdir(file_dir)

    return result_data


def _get_per_user_string():
    try:
        from os import getuid
    except ImportError:
        checksum = _new_md5()
        from os import environ
        checksum.update(environ["USERNAME"].encode("utf-8"))
        return checksum.hexdigest()
    else:
        return "uid%d" % getuid()


def _find_pycuda_include_path():
    from pkg_resources import Requirement, resource_filename
    return resource_filename(Requirement.parse("pycuda"), "pycuda/cuda")


import os
DEFAULT_NVCC_FLAGS = [
        _flag.strip() for _flag in
        os.environ.get("PYCUDA_DEFAULT_NVCC_FLAGS", "").split()
        if _flag.strip()]


def compile(source, nvcc="nvcc", options=None, keep=False,
        no_extern_c=False, arch=None, code=None, cache_dir=None,
        include_dirs=[], target="cubin"):

    assert target in ["cubin", "ptx", "fatbin"]

    if not no_extern_c:
        source = 'extern "C" {\n%s\n}\n' % source

    if options is None:
        options = DEFAULT_NVCC_FLAGS

    options = options[:]
    if arch is None:
        from pycuda.driver import Error
        try:
            from pycuda.driver import Context
            arch = "sm_%d%d" % Context.get_device().compute_capability()
        except Error:
            pass

    from pycuda.driver import CUDA_DEBUGGING
    if CUDA_DEBUGGING:
        cache_dir = False
        keep = True
        options.extend(["-g", "-G"])

    if "PYCUDA_CACHE_DIR" in os.environ and cache_dir is None:
        cache_dir = os.environ["PYCUDA_CACHE_DIR"]

    if "PYCUDA_DISABLE_CACHE" in os.environ:
        cache_dir = False

    if cache_dir is None:
        import appdirs
        cache_dir = os.path.join(appdirs.user_cache_dir("pycuda", "pycuda"),
                "compiler-cache-v1")

        from os import makedirs
        try:
            makedirs(cache_dir)
        except OSError as e:
            from errno import EEXIST
            if e.errno != EEXIST:
                raise

    if arch is not None:
        options.extend(["-arch", arch])

    if code is not None:
        options.extend(["-code", code])

    if 'darwin' in sys.platform and sys.maxsize == 9223372036854775807:
        options.append('-m64')
    elif 'win32' in sys.platform and sys.maxsize == 9223372036854775807:
        options.append('-m64')
    elif 'win32' in sys.platform and sys.maxsize == 2147483647:
        options.append('-m32')

    include_dirs = include_dirs + [_find_pycuda_include_path()]

    for i in include_dirs:
        options.append("-I"+i)

    return compile_plain(source, options, keep, nvcc, cache_dir, target)


class CudaModule(object):
    def _check_arch(self, arch):
        if arch is None:
            return
        try:
            from pycuda.driver import Context
            capability = Context.get_device().compute_capability()
            if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                from warnings import warn
                warn("trying to compile for a compute capability "
                        "higher than selected GPU")
        except Exception:
            pass

    def _bind_module(self):
        self.get_global = self.module.get_global
        self.get_texref = self.module.get_texref
        if hasattr(self.module, "get_surfref"):
            self.get_surfref = self.module.get_surfref

    def get_function(self, name):
        return self.module.get_function(name)

class SourceModule(CudaModule):
    '''
    Creates a Module from a single .cu source object linked against the
    static CUDA runtime.
    '''
    def __init__(self, source, nvcc="nvcc", options=None, keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        self._check_arch(arch)

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
        self.module = module_from_buffer(cubin)

        self._bind_module()


def _search_on_path(filenames):
    """Find file on system path."""
    # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224

    from os.path import exists, abspath, join
    from os import pathsep, environ

    search_path = environ["PATH"]

    paths = search_path.split(pathsep)
    for path in paths:
        for filename in filenames:
            if exists(join(path, filename)):
                return abspath(join(path, filename))


@memoize
def _find_nvcc_on_path():
    return _search_on_path(["nvcc", "nvcc.exe"])


class DynamicModule(CudaModule):
    '''
    Creates a Module from multiple .cu source, library file and/or data
    objects linked against the static or dynamic CUDA runtime.
    '''
    def __init__(self, nvcc='nvcc', link_options=None, keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[],  message_handler=None, log_verbose=False,
            cuda_libdir=None):
        from pycuda.driver import Context
        compute_capability = Context.get_device().compute_capability()
        if compute_capability < (3,5):
            raise Exception('Minimum compute capability for dynamic parallelism is 3.5 (found: %u.%u)!' %
                (compute_capability[0], compute_capability[1]))
        else:
            from pycuda.driver import Linker
            self.linker = Linker(message_handler, link_options, log_verbose)
        self._check_arch(arch)
        self.nvcc = nvcc
        self.keep = keep
        self.no_extern_c = no_extern_c
        self.arch = arch
        self.code = code
        self.cache_dir = cache_dir
        self.include_dirs = include_dirs
        self.cuda_libdir = cuda_libdir
Loading ...