Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

edgify / pycuda   python

Repository URL to install this package:

/ cuda / pycuda-helpers.hpp

#include <pycuda-complex.hpp>
#include <surface_functions.h>
#ifndef _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
#define _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP

extern "C++" {
  // "double-precision" textures ------------------------------------------------
  /* Thanks to Nathan Bell <nbell@nvidia.com> for help in figuring this out. */

  typedef float fp_tex_float;
  typedef int2 fp_tex_double;
  typedef uint2 fp_tex_cfloat;
  typedef int4 fp_tex_cdouble;

   template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<float> fp_tex1Dfetch(texture<fp_tex_cfloat, 1, read_mode> tex, int i)
  {
    fp_tex_cfloat v = tex1Dfetch(tex, i);
    pycuda::complex<float> out;
    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<double> fp_tex1Dfetch(texture<fp_tex_cdouble, 1, read_mode> tex, int i)
  {
    fp_tex_cdouble v = tex1Dfetch(tex, i);
    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ double fp_tex1Dfetch(texture<fp_tex_double, 1, read_mode> tex, int i)
  {
    fp_tex_double v = tex1Dfetch(tex, i);
    return __hiloint2double(v.y, v.x);
  }

// 2D functionality

  template <enum cudaTextureReadMode read_mode>
  __device__ double fp_tex2D(texture<fp_tex_double, 2, read_mode> tex, int i, int j)
  {
    fp_tex_double v = tex2D(tex, i, j);
    return __hiloint2double(v.y, v.x);
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<float> fp_tex2D(texture<fp_tex_cfloat, 2, read_mode> tex, int i, int j)
  {
    fp_tex_cfloat v = tex2D(tex, i, j);
    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<double> fp_tex2D(texture<fp_tex_cdouble, 2, read_mode> tex, int i, int j)
  {
    fp_tex_cdouble v = tex2D(tex, i, j);
    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }
  // 2D Layered extension

  template <enum cudaTextureReadMode read_mode>
  __device__ double fp_tex2DLayered(texture<fp_tex_double, cudaTextureType2DLayered, read_mode> tex, float i, float j, int layer)
  {
    fp_tex_double v = tex2DLayered(tex, i, j, layer);
    return __hiloint2double(v.y, v.x);
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<float> fp_tex2DLayered(texture<fp_tex_cfloat, cudaTextureType2DLayered, read_mode> tex, float i, float j, int layer)
  {
    fp_tex_cfloat v = tex2DLayered(tex, i, j, layer);
    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<double> fp_tex2DLayered(texture<fp_tex_cdouble, cudaTextureType2DLayered, read_mode> tex, float i, float j, int layer)
  {
    fp_tex_cdouble v = tex2DLayered(tex, i, j, layer);
    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }

  // 3D functionality

  template <enum cudaTextureReadMode read_mode>
  __device__ double fp_tex3D(texture<fp_tex_double, 3, read_mode> tex, int i, int j, int k)
  {
    fp_tex_double v = tex3D(tex, i, j, k);
    return __hiloint2double(v.y, v.x);
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<float> fp_tex3D(texture<fp_tex_cfloat, 3, read_mode> tex, int i, int j, int k)
  {
    fp_tex_cfloat v = tex3D(tex, i, j, k);
    return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  template <enum cudaTextureReadMode read_mode>
  __device__ pycuda::complex<double> fp_tex3D(texture<fp_tex_cdouble, 3, read_mode> tex, int i, int j, int k)
  {
    fp_tex_cdouble v = tex3D(tex, i, j, k);
    return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }

  // FP_Surfaces with complex supprt

  __device__ void fp_surf2DLayeredwrite(double var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_double auxvar;
    auxvar.x =  __double2loint(var);
    auxvar.y =  __double2hiint(var);
    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_double), j, layer, mode);
  }

  __device__ void fp_surf2DLayeredwrite(pycuda::complex<float> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cfloat auxvar;
    auxvar.x =  __float_as_int(var._M_re);
    auxvar.y =  __float_as_int(var._M_im);
    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, layer,mode);
  }

  __device__ void fp_surf2DLayeredwrite(pycuda::complex<double> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cdouble auxvar;
    auxvar.x =  __double2loint(var._M_re);
    auxvar.y =  __double2hiint(var._M_re);

    auxvar.z = __double2loint(var._M_im);
    auxvar.w = __double2hiint(var._M_im);
    surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, layer,mode);
  }

  __device__ void fp_surf3Dwrite(double var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_double auxvar;
    auxvar.x =  __double2loint(var);
    auxvar.y =  __double2hiint(var);
    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_double), j, k,mode);
  }

  __device__ void fp_surf3Dwrite(pycuda::complex<float> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cfloat auxvar;
    auxvar.x =  __float_as_int(var._M_re);
    auxvar.y =  __float_as_int(var._M_im);

    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
  }

  __device__ void fp_surf3Dwrite(pycuda::complex<double> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cdouble auxvar;
    auxvar.x =  __double2loint(var._M_re);
    auxvar.y =  __double2hiint(var._M_re);

    auxvar.z = __double2loint(var._M_im);
    auxvar.w = __double2hiint(var._M_im);
    surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
  }

  __device__ void fp_surf2DLayeredread(double *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_double v;
    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_double), j, layer, mode);
    *var = __hiloint2double(v.y, v.x);
  }

  __device__ void fp_surf2DLayeredread(pycuda::complex<float> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cfloat v;
    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cfloat), j, layer, mode);
    *var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  __device__ void fp_surf2DLayeredread(pycuda::complex<double> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cdouble v;
    surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cdouble), j, layer, mode);
    *var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }

  __device__ void fp_surf3Dread(double *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_double v;
    surf3Dread(&v, surf, i*sizeof(fp_tex_double), j, k, mode);
    *var = __hiloint2double(v.y, v.x);
  }

  __device__ void fp_surf3Dread(pycuda::complex<float> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cfloat v;
    surf3Dread(&v, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
    *var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
  }

  __device__ void fp_surf3Dread(pycuda::complex<double> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
  {
    fp_tex_cdouble v;
    surf3Dread(&v, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
    *var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
  }
#define PYCUDA_GENERATE_FP_TEX_FUNCS(TYPE) \
  template <enum cudaTextureReadMode read_mode> \
  __device__ TYPE fp_tex1Dfetch(texture<TYPE, 1, read_mode> tex, int i) \
  { \
    return tex1Dfetch(tex, i); \
  } \
  template <enum cudaTextureReadMode read_mode> \
  __device__ TYPE fp_tex2D(texture<TYPE, 2, read_mode> tex, int i, int j) \
  { \
    return tex2D(tex, i, j); \
  } \
  template <enum cudaTextureReadMode read_mode> \
   __device__ TYPE fp_tex2DLayered(texture<TYPE, cudaTextureType2DLayered, read_mode> tex, int i, int j, int layer) \
  { \
    return tex2DLayered(tex, i, j, layer); \
  } \
  template <enum cudaTextureReadMode read_mode> \
  __device__ TYPE fp_tex3D(texture<TYPE, 3, read_mode> tex, int i, int j, int k) \
  { \
    return tex3D(tex, i, j, k); \
  } \
  __device__ void fp_surf2DLayeredwrite(TYPE var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
  { \
    surf2DLayeredwrite(var, surf, i*sizeof(TYPE), j, layer, mode); \
  } \
  __device__ void fp_surf2DLayeredread(TYPE *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
  { \
    surf2DLayeredread(var, surf, i*sizeof(TYPE), j, layer, mode); \
  } \
  __device__ void fp_surf3Dwrite(TYPE var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
  { \
    surf3Dwrite(var, surf, i*sizeof(TYPE), j, k, mode); \
  } \
  __device__ void fp_surf3Dread(TYPE *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
  { \
    surf3Dread(var, surf, i*sizeof(TYPE), j, k, mode); \
  }
  PYCUDA_GENERATE_FP_TEX_FUNCS(float)
  PYCUDA_GENERATE_FP_TEX_FUNCS(int)
  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned int)
  PYCUDA_GENERATE_FP_TEX_FUNCS(short int)
  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned short int)
  PYCUDA_GENERATE_FP_TEX_FUNCS(char)
  PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned char)
}

#endif