Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #91 from zamorays/surfANDtexExtensions
extFPTextureSurfaces
  • Loading branch information
inducer committed Oct 21, 2015
2 parents d68e274 + 7883a2b commit c6713a6
Show file tree
Hide file tree
Showing 3 changed files with 412 additions and 6 deletions.
153 changes: 148 additions & 5 deletions pycuda/cuda/pycuda-helpers.hpp
@@ -1,12 +1,12 @@
#include <pycuda-complex.hpp>

#include <surface_functions.h>
#ifndef _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP
#define _AFJKDASLFSADHF_HEADER_SEEN_PYCUDA_HELPERS_HPP

extern "C++" {
// "double-precision" textures ------------------------------------------------
/* Thanks to Nathan Bell <nbell@nvidia.com> for help in figuring this out. */

typedef float fp_tex_float;
typedef int2 fp_tex_double;
typedef uint2 fp_tex_cfloat;
Expand Down Expand Up @@ -34,39 +34,182 @@ extern "C++" {
return __hiloint2double(v.y, v.x);
}

// 2D functionality

template <enum cudaTextureReadMode read_mode>
__device__ double fp_tex2D(texture<fp_tex_double, 2, read_mode> tex, int i, int j)
{
fp_tex_double v = tex2D(tex, i, j);
return __hiloint2double(v.y, v.x);
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<float> fp_tex2D(texture<fp_tex_cfloat, 2, read_mode> tex, int i, int j)
{
fp_tex_cfloat v = tex2D(tex, i, j);
return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<double> fp_tex2D(texture<fp_tex_cdouble, 2, read_mode> tex, int i, int j)
{
fp_tex_cdouble v = tex2D(tex, i, j);
return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

// 3D functionality

template <enum cudaTextureReadMode read_mode>
__device__ double fp_tex3D(texture<fp_tex_double, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_double v = tex3D(tex, i, j, k);
return __hiloint2double(v.y, v.x);
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<float> fp_tex3D(texture<fp_tex_cfloat, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_cfloat v = tex3D(tex, i, j, k);
return pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

template <enum cudaTextureReadMode read_mode>
__device__ pycuda::complex<double> fp_tex3D(texture<fp_tex_cdouble, 3, read_mode> tex, int i, int j, int k)
{
fp_tex_cdouble v = tex3D(tex, i, j, k);
return pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

// FP_Surfaces with complex supprt

__device__ void fp_surf2DLayeredwrite(double var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double auxvar;
auxvar.x = __double2loint(var);
auxvar.y = __double2hiint(var);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_double), j, layer, mode);
}

__device__ void fp_surf2DLayeredwrite(pycuda::complex<float> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat auxvar;
auxvar.x = __float_as_int(var._M_re);
auxvar.y = __float_as_int(var._M_im);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, layer,mode);
}

__device__ void fp_surf2DLayeredwrite(pycuda::complex<double> var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble auxvar;
auxvar.x = __double2loint(var._M_re);
auxvar.y = __double2hiint(var._M_re);

auxvar.z = __double2loint(var._M_im);
auxvar.w = __double2hiint(var._M_im);
surf2DLayeredwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, layer,mode);
}

__device__ void fp_surf3Dwrite(double var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double auxvar;
auxvar.x = __double2loint(var);
auxvar.y = __double2hiint(var);
surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_double), j, k,mode);
}

__device__ void fp_surf3Dwrite(pycuda::complex<float> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat auxvar;
auxvar.x = __float_as_int(var._M_re);
auxvar.y = __float_as_int(var._M_im);

surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
}

__device__ void fp_surf3Dwrite(pycuda::complex<double> var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble auxvar;
auxvar.x = __double2loint(var._M_re);
auxvar.y = __double2hiint(var._M_re);

auxvar.z = __double2loint(var._M_im);
auxvar.w = __double2hiint(var._M_im);
surf3Dwrite(auxvar, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
}

__device__ void fp_surf2DLayeredread(double *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_double), j, layer, mode);
*var = __hiloint2double(v.y, v.x);
}

__device__ void fp_surf2DLayeredread(pycuda::complex<float> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cfloat), j, layer, mode);
*var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

__device__ void fp_surf2DLayeredread(pycuda::complex<double> *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble v;
surf2DLayeredread(&v, surf, i*sizeof(fp_tex_cdouble), j, layer, mode);
*var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}

__device__ void fp_surf3Dread(double *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_double v;
surf3Dread(&v, surf, i*sizeof(fp_tex_double), j, k, mode);
*var = __hiloint2double(v.y, v.x);
}

__device__ void fp_surf3Dread(pycuda::complex<float> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cfloat v;
surf3Dread(&v, surf, i*sizeof(fp_tex_cfloat), j, k, mode);
*var = pycuda::complex<float>(__int_as_float(v.x), __int_as_float(v.y));
}

__device__ void fp_surf3Dread(pycuda::complex<double> *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode)
{
fp_tex_cdouble v;
surf3Dread(&v, surf, i*sizeof(fp_tex_cdouble), j, k, mode);
*var = pycuda::complex<double>(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
}
#define PYCUDA_GENERATE_FP_TEX_FUNCS(TYPE) \
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex1Dfetch(texture<TYPE, 1, read_mode> tex, int i) \
{ \
return tex1Dfetch(tex, i); \
} \
\
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex2D(texture<TYPE, 2, read_mode> tex, int i, int j) \
{ \
return tex2D(tex, i, j); \
} \
\
template <enum cudaTextureReadMode read_mode> \
__device__ TYPE fp_tex3D(texture<TYPE, 3, read_mode> tex, int i, int j, int k) \
{ \
return tex3D(tex, i, j, k); \
} \
__device__ void fp_surf2DLayeredwrite(TYPE var,surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
{ \
surf2DLayeredwrite(var, surf, i*sizeof(TYPE), j, layer, mode); \
} \
__device__ void fp_surf2DLayeredread(TYPE *var, surface<void, cudaSurfaceType2DLayered> surf, int i, int j, int layer,enum cudaSurfaceBoundaryMode mode) \
{ \
surf2DLayeredread(var, surf, i*sizeof(TYPE), j, layer, mode); \
} \
__device__ void fp_surf3Dwrite(TYPE var,surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
{ \
surf3Dwrite(var, surf, i*sizeof(TYPE), j, k, mode); \
} \
__device__ void fp_surf3Dread(TYPE *var, surface<void, 3> surf, int i, int j, int k, enum cudaSurfaceBoundaryMode mode) \
{ \
surf3Dread(var, surf, i*sizeof(TYPE), j, k, mode); \
}

PYCUDA_GENERATE_FP_TEX_FUNCS(float)
PYCUDA_GENERATE_FP_TEX_FUNCS(int)
PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned int)
Expand Down
67 changes: 67 additions & 0 deletions pycuda/driver.py
Expand Up @@ -724,6 +724,73 @@ def matrix_to_array(matrix, order, allow_double_hack=False):

return ary

def np_to_array(nparray, order, allowSurfaceBind=False):

case = order in ["C","F"]
if not case:
raise LogicError("order must be either F or C")

dimension = len(nparray.shape)
if dimension == 2:
if order == "C": stride = 0
if order == "F": stride = -1
h, w = nparray.shape
d = 1
if allowSurfaceBind:
descrArr = ArrayDescriptor3D()
descrArr.width = w
descrArr.height = h
descrArr.depth = d
else:
descrArr = ArrayDescriptor()
descrArr.width = w
descrArr.height = h
elif dimension == 3:
if order == "C": stride = 1
if order == "F": stride = 1
d, h, w = nparray.shape
descrArr = ArrayDescriptor3D()
descrArr.width = w
descrArr.height = h
descrArr.depth = d
else:
raise LogicError("CUDArray dimensions 2 and 3 supported in CUDA at the moment ... ")

if nparray.dtype == np.complex64:
descrArr.format = array_format.SIGNED_INT32 # Reading data as int2 (hi=re,lo=im) structure
descrArr.num_channels = 2
elif nparray.dtype == np.float64:
descrArr.format = array_format.SIGNED_INT32 # Reading data as int2 (hi,lo) structure
descrArr.num_channels = 2
elif nparray.dtype == np.complex128:
descrArr.format = array_format.SIGNED_INT32 # Reading data as int4 (re=(hi,lo),im=(hi,lo)) structure
descrArr.num_channels = 4
else:
descrArr.format = dtype_to_array_format(nparray.dtype)
descrArr.num_channels = 1

if allowSurfaceBind:
if dimension==2: descrArr.flags |= array3d_flags.ARRAY3D_LAYERED
descrArr.flags |= array3d_flags.SURFACE_LDST

cudaArray = Array(descrArr)
if allowSurfaceBind or dimension==3:
copy3D = Memcpy3D()
copy3D.set_src_host(nparray)
copy3D.set_dst_array(cudaArray)
copy3D.width_in_bytes = copy3D.src_pitch = nparray.strides[stride]
copy3D.src_height = copy3D.height = h
copy3D.depth = d
copy3D()
return cudaArray
else:
copy2D = Memcpy2D()
copy2D.set_src_host(nparray)
copy2D.set_dst_array(cudaArray)
copy2D.width_in_bytes = copy2D.src_pitch = nparray.strides[stride]
copy2D.src_height = copy2D.height = h
copy2D(aligned=True)
return cudaArray

def make_multichannel_2d_array(ndarray, order):
"""Channel count has to be the first dimension of the C{ndarray}."""
Expand Down

0 comments on commit c6713a6

Please sign in to comment.