Repository URL to install this package:
|
Version:
0.36.2 ▾
|
from __future__ import print_function, absolute_import, division
import operator
from functools import reduce
from llvmlite.llvmpy.core import Type
import llvmlite.llvmpy.core as lc
import llvmlite.binding as ll
from numba.targets.imputils import Registry
from numba import cgutils
from numba import types
from numba.itanium_mangler import mangle_c, mangle, mangle_type
from . import target
from . import stubs
from . import hlc
from . import enums
registry = Registry()
lower = registry.lower
_void_value = lc.Constant.null(lc.Type.pointer(lc.Type.int(8)))
# -----------------------------------------------------------------------------
def _declare_function(context, builder, name, sig, cargs,
mangler=mangle_c):
"""Insert declaration for a opencl builtin function.
Uses the Itanium mangler.
Args
----
context: target context
builder: llvm builder
name: str
symbol name
sig: signature
function signature of the symbol being declared
cargs: sequence of str
C type names for the arguments
mangler: a mangler function
function to use to mangle the symbol
"""
mod = builder.module
if sig.return_type == types.void:
llretty = lc.Type.void()
else:
llretty = context.get_value_type(sig.return_type)
llargs = [context.get_value_type(t) for t in sig.args]
fnty = Type.function(llretty, llargs)
mangled = mangler(name, cargs)
fn = mod.get_or_insert_function(fnty, mangled)
fn.calling_convention = target.CC_SPIR_FUNC
return fn
@lower(stubs.get_global_id, types.uint32)
def get_global_id_impl(context, builder, sig, args):
[dim] = args
get_global_id = _declare_function(context, builder, 'get_global_id', sig,
['unsigned int'])
res = builder.call(get_global_id, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.get_local_id, types.uint32)
def get_local_id_impl(context, builder, sig, args):
[dim] = args
get_local_id = _declare_function(context, builder, 'get_local_id', sig,
['unsigned int'])
res = builder.call(get_local_id, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.get_group_id, types.uint32)
def get_group_id_impl(context, builder, sig, args):
[dim] = args
get_group_id = _declare_function(context, builder, 'get_group_id', sig,
['unsigned int'])
res = builder.call(get_group_id, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.get_num_groups, types.uint32)
def get_num_groups_impl(context, builder, sig, args):
[dim] = args
get_num_groups = _declare_function(context, builder, 'get_num_groups', sig,
['unsigned int'])
res = builder.call(get_num_groups, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.get_work_dim)
def get_work_dim_impl(context, builder, sig, args):
get_work_dim = _declare_function(context, builder, 'get_work_dim', sig,
["void"])
res = builder.call(get_work_dim, [])
return res
@lower(stubs.get_global_size, types.uint32)
def get_global_size_impl(context, builder, sig, args):
[dim] = args
get_global_size = _declare_function(context, builder, 'get_global_size',
sig, ['unsigned int'])
res = builder.call(get_global_size, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.get_local_size, types.uint32)
def get_local_size_impl(context, builder, sig, args):
[dim] = args
get_local_size = _declare_function(context, builder, 'get_local_size',
sig, ['unsigned int'])
res = builder.call(get_local_size, [dim])
return context.cast(builder, res, types.uintp, types.intp)
@lower(stubs.barrier, types.uint32)
def barrier_one_arg_impl(context, builder, sig, args):
[flags] = args
barrier = _declare_function(context, builder, 'barrier', sig,
['unsigned int'])
builder.call(barrier, [flags])
return _void_value
@lower(stubs.barrier)
def barrier_no_arg_impl(context, builder, sig, args):
assert not args
sig = types.void(types.uint32)
barrier = _declare_function(context, builder, 'barrier', sig,
['unsigned int'])
flags = context.get_constant(types.uint32, enums.CLK_GLOBAL_MEM_FENCE)
builder.call(barrier, [flags])
return _void_value
@lower(stubs.mem_fence, types.uint32)
def mem_fence_impl(context, builder, sig, args):
[flags] = args
mem_fence = _declare_function(context, builder, 'mem_fence', sig,
['unsigned int'])
builder.call(mem_fence, [flags])
return _void_value
@lower(stubs.wavebarrier)
def wavebarrier_impl(context, builder, sig, args):
assert not args
fnty = Type.function(Type.void(), [])
fn = builder.module.get_or_insert_function(fnty, name="__hsail_wavebarrier")
fn.calling_convention = target.CC_SPIR_FUNC
builder.call(fn, [])
return _void_value
@lower(stubs.activelanepermute_wavewidth,
types.Any, types.uint32, types.Any, types.bool_)
def activelanepermute_wavewidth_impl(context, builder, sig, args):
[src, laneid, identity, use_ident] = args
assert sig.args[0] == sig.args[2]
elem_type = sig.args[0]
bitwidth = elem_type.bitwidth
intbitwidth = Type.int(bitwidth)
i32 = Type.int(32)
i1 = Type.int(1)
name = "__hsail_activelanepermute_wavewidth_b{0}".format(bitwidth)
fnty = Type.function(intbitwidth, [intbitwidth, i32, intbitwidth, i1])
fn = builder.module.get_or_insert_function(fnty, name=name)
fn.calling_convention = target.CC_SPIR_FUNC
def cast(val):
return builder.bitcast(val, intbitwidth)
result = builder.call(fn, [cast(src), laneid, cast(identity), use_ident])
return builder.bitcast(result, context.get_value_type(elem_type))
@lower(stubs.atomic.add, types.Array, types.intp, types.Any)
@lower(stubs.atomic.add, types.Array,
types.UniTuple, types.Any)
@lower(stubs.atomic.add, types.Array, types.Tuple,
types.Any)
def hsail_atomic_add_tuple(context, builder, sig, args):
aryty, indty, valty = sig.args
ary, inds, val = args
dtype = aryty.dtype
if indty == types.intp:
indices = [inds] # just a single integer
indty = [indty]
else:
indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
indices = [context.cast(builder, i, t, types.intp)
for t, i in zip(indty, indices)]
if dtype != valty:
raise TypeError("expecting %s but got %s" % (dtype, valty))
if aryty.ndim != len(indty):
raise TypeError("indexing %d-D array with %d-D index" %
(aryty.ndim, len(indty)))
lary = context.make_array(aryty)(context, builder, ary)
ptr = cgutils.get_item_pointer(builder, aryty, lary, indices)
return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
@lower('hsail.smem.alloc', types.UniTuple, types.Any)
def hsail_smem_alloc_array(context, builder, sig, args):
shape, dtype = args
return _generic_array(context, builder, shape=shape, dtype=dtype,
symbol_name='_hsapy_smem',
addrspace=target.SPIR_LOCAL_ADDRSPACE)
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace):
elemcount = reduce(operator.mul, shape)
lldtype = context.get_data_type(dtype)
laryty = Type.array(lldtype, elemcount)
if addrspace == target.SPIR_LOCAL_ADDRSPACE:
lmod = builder.module
# Create global variable in the requested address-space
gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
if elemcount <= 0:
raise ValueError("array length <= 0")
else:
gvmem.linkage = lc.LINKAGE_INTERNAL
gvmem.initializer = lc.Constant.null(laryty)
if dtype not in types.number_domain:
raise TypeError("unsupported type: %s" % dtype)
# Convert to generic address-space
dataptr = context.addrspacecast(builder, gvmem,
target.SPIR_GENERIC_ADDRSPACE)
else:
raise NotImplementedError("addrspace {addrspace}".format(**locals()))
return _make_array(context, builder, dataptr, dtype, shape)
def _make_array(context, builder, dataptr, dtype, shape, layout='C'):
ndim = len(shape)
# Create array object
aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
ary = context.make_array(aryty)(context, builder)
targetdata = _get_target_data(context)
lldtype = context.get_data_type(dtype)
itemsize = lldtype.get_abi_size(targetdata)
# Compute strides
rstrides = [itemsize]
for i, lastsize in enumerate(reversed(shape[1:])):
rstrides.append(lastsize * rstrides[-1])
strides = [s for s in reversed(rstrides)]
kshape = [context.get_constant(types.intp, s) for s in shape]
kstrides = [context.get_constant(types.intp, s) for s in strides]
context.populate_array(ary,
data=builder.bitcast(dataptr, ary.data.type),
shape=cgutils.pack_array(builder, kshape),
strides=cgutils.pack_array(builder, kstrides),
itemsize=context.get_constant(types.intp, itemsize),
meminfo=None)
return ary._getvalue()
def _get_target_data(context):
return ll.create_target_data(hlc.DATALAYOUT[context.address_size])