Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Branch to test end extend Textures and Surfaces with fp_ support of c…
…omplex and doubles ...
  • Loading branch information
Roberto Zamora-Zamora committed Oct 20, 2015
1 parent d68e274 commit 97dc06a
Showing 1 changed file with 148 additions and 5 deletions.
153 changes: 148 additions & 5 deletions pycuda/cuda/pycuda-helpers.hpp
@@ -1,12 +1,12 @@
#include <pycuda-complex.hpp>

#include <surface_functions.h>
#ifndef _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
#define _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP

extern "C++" {
// "double-precision" textures ------------------------------------------------
/* Thanks to Nathan Bell <nbell@nvidia.com> for help in figuring this out. */

typedef float fp_tex_float;
typedef int2 fp_tex_double;
typedef uint2 fp_tex_cfloat;
Expand Down Expand Up @@ -34,39 +34,182 @@ extern "C++" {
return __hiloint2double(v.y, v.x);
}

// 2D functionality

template <enum cudaTextureReadMode read_mode>
__device__ double fp_tex2D(texture<fp_tex_double, 2, read_mode> tex, int i, int j)
{
fp_tex_double v = tex2D(tex, i, j);
return __hiloint2double(v.y, v.x);
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<float> fp_tex2D(texture<fp_tex_cfloat, 2, read_mode> tex, int i, int j)
{
fp_tex_cfloat v = tex2D(tex, i, j);
return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<double> fp_tex2D(texture<fp_tex_cdouble, 2, read_mode> tex, int i, int j)
{
fp_tex_cdouble v = tex2D(tex, i, j);
return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

// 3D functionality

template <enum cudaTextureReadMode read_mode>
__device__ double fp_tex3D(texture<fp_tex_double, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_double v = tex3D(tex, i, j, k);
return __hiloint2double(v.y, v.x);
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<float> fp_tex3D(texture<fp_tex_cfloat, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_cfloat v = tex3D(tex, i, j, k);
return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<double> fp_tex3D(texture<fp_tex_cdouble, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_cdouble v = tex3D(tex, i, j, k);
return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

// FP_Surfaces with complex supprt

__device__ void fp_surf2DLayeredwrite(double var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double auxvar;
auxvar.x = __double2loint(var);
auxvar.y = __double2hiint(var);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_double), j, layer, mode);
}

__device__ void fp_surf2DLayeredwrite(pycuda::complex<float> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat auxvar;
auxvar.x = __float_as_int(var._M_re);
auxvar.y = __float_as_int(var._M_im);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, layer,mode);
}

__device__ void fp_surf2DLayeredwrite(pycuda::complex<double> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble auxvar;
auxvar.x = __double2loint(var._M_re);
auxvar.y = __double2hiint(var._M_re);

auxvar.z = __double2loint(var._M_im);
auxvar.w = __double2hiint(var._M_im);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, layer,mode);
}

__device__ void fp_surf3Dwrite(double var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double auxvar;
auxvar.x = __double2loint(var);
auxvar.y = __double2hiint(var);
surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_double), j, k,mode);
}

__device__ void fp_surf3Dwrite(pycuda::complex<float> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat auxvar;
auxvar.x = __float_as_int(var._M_re);
auxvar.y = __float_as_int(var._M_im);

surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
}

__device__ void fp_surf3Dwrite(pycuda::complex<double> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble auxvar;
auxvar.x = __double2loint(var._M_re);
auxvar.y = __double2hiint(var._M_re);

auxvar.z = __double2loint(var._M_im);
auxvar.w = __double2hiint(var._M_im);
surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
}

__device__ void fp_surf2DLayeredread(double *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_double), j, layer, mode);
*var = __hiloint2double(v.y, v.x);
}

__device__ void fp_surf2DLayeredread(pycuda::complex<float> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cfloat), j, layer, mode);
*var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

__device__ void fp_surf2DLayeredread(pycuda::complex<double> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cdouble), j, layer, mode);
*var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

__device__ void fp_surf3Dread(double *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double v;
surf3Dread(&v, surf, i*sizeof(fp_tex_double), j, k, mode);
*var = __hiloint2double(v.y, v.x);
}

__device__ void fp_surf3Dread(pycuda::complex<float> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat v;
surf3Dread(&v, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
*var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

__device__ void fp_surf3Dread(pycuda::complex<double> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble v;
surf3Dread(&v, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
*var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}
#define PYCUDA_GENERATE_FP_TEX_FUNCS(TYPE) \
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex1Dfetch(texture<TYPE, 1, read_mode> tex, int i) \
{ \
return tex1Dfetch(tex, i); \
} \
\
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex2D(texture<TYPE, 2, read_mode> tex, int i, int j) \
{ \
return tex2D(tex, i, j); \
} \
\
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex3D(texture<TYPE, 3, read_mode> tex, int i, int j, int k) \
{ \
return tex3D(tex, i, j, k); \
} \
__device__ void fp_surf2DLayeredwrite(TYPE var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
{ \
surf2DLayeredwrite(var, surf, i*sizeof(TYPE), j, layer, mode); \
} \
__device__ void fp_surf2DLayeredread(TYPE *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
{ \
surf2DLayeredread(var, surf, i*sizeof(TYPE), j, layer, mode); \
} \
__device__ void fp_surf3Dwrite(TYPE var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
{ \
surf3Dwrite(var, surf, i*sizeof(TYPE), j, k, mode); \
} \
__device__ void fp_surf3Dread(TYPE *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
{ \
surf3Dread(var, surf, i*sizeof(TYPE), j, k, mode); \
}

PYCUDA_GENERATE_FP_TEX_FUNCS(float)
PYCUDA_GENERATE_FP_TEX_FUNCS(int)
PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned int)
Expand Down

0 comments on commit 97dc06a

Please sign in to comment.