Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Switch to new-style buffer interface, drop support for Py<2.6
  • Loading branch information
inducer committed Aug 11, 2014
1 parent 1a676b5 commit 6be1bea
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 102 deletions.
4 changes: 1 addition & 3 deletions setup.py
Expand Up @@ -157,12 +157,10 @@ def main():
'Programming Language :: C++',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2.4',
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Physics',
Expand Down
88 changes: 60 additions & 28 deletions src/cpp/cuda.hpp
Expand Up @@ -9,6 +9,8 @@



// {{{ includes, configuration

#include <cuda.h>

#ifdef CUDAPP_PRETEND_CUDA_VERSION
Expand Down Expand Up @@ -43,7 +45,7 @@
#endif

// MAYBE? cuMemcpy, cuPointerGetAttribute
// TODO: cuCtxSetCurrent, cuCtxGetCurrent
// TODO: cuCtxSetCurrent, cuCtxGetCurrent
// (use once the old, deprecated functions have been removed from CUDA)


Expand All @@ -61,16 +63,16 @@



#if (PY_VERSION_HEX < 0x02060000)
#error PyCUDA does not support Python 2 versions earlier than 2.6.
#endif
#if (PY_VERSION_HEX >= 0x03000000) && (PY_VERSION_HEX < 0x03030000)
#error PyCUDA does not support Python 3 versions earlier than 3.3.
#endif

#if PY_VERSION_HEX >= 0x02050000
typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T;
#else
typedef int PYCUDA_BUFFER_SIZE_T;
#endif
typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T;

// }}}


#define PYCUDA_PARSE_STREAM_PY \
Expand Down Expand Up @@ -181,11 +183,11 @@ namespace pycuda
#if CUDAPP_CUDA_VERSION >= 3020
size_t
#else
unsigned int
unsigned int
#endif
pycuda_size_t;

typedef
typedef
#if defined(_WIN32) && defined(_WIN64)
long long
#else
Expand Down Expand Up @@ -354,6 +356,38 @@ namespace pycuda

// }}}

// {{{ buffer interface helper

class py_buffer_wrapper : public boost::noncopyable
{
private:
bool m_initialized;

public:
Py_buffer m_buf;

py_buffer_wrapper()
: m_initialized(false)
{}

void get(PyObject *obj, int flags)
{
if (PyObject_GetBuffer(obj, &m_buf, flags))
throw py::error_already_set();

m_initialized = true;
}

virtual ~py_buffer_wrapper()
{
if (m_initialized)
PyBuffer_Release(&m_buf);
}
};

// }}}


// {{{ version query ------------------------------------------------------------
#if CUDAPP_CUDA_VERSION >= 2020
inline int get_driver_version()
Expand Down Expand Up @@ -1375,20 +1409,18 @@ namespace pycuda

PYCUDA_PARSE_STREAM_PY;

const void *par_buf;
PYCUDA_BUFFER_SIZE_T py_par_len;
if (PyObject_AsReadBuffer(parameter_buffer.ptr(), &par_buf, &py_par_len))
throw py::error_already_set();
size_t par_len = py_par_len;
py_buffer_wrapper par_buf_wrapper;
par_buf_wrapper.get(parameter_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
size_t par_len = par_buf_wrapper.m_buf.len;

void *config[] = {
CU_LAUNCH_PARAM_BUFFER_POINTER, const_cast<void *>(par_buf),
CU_LAUNCH_PARAM_BUFFER_POINTER, const_cast<void *>(par_buf_wrapper.m_buf.buf),
CU_LAUNCH_PARAM_BUFFER_SIZE, &par_len,
CU_LAUNCH_PARAM_END
};

CUDAPP_CALL_GUARDED(
cuLaunchKernel, (m_function,
cuLaunchKernel, (m_function,
grid_dim[0], grid_dim[1], grid_dim[2],
block_dim[0], block_dim[1], block_dim[2],
shared_mem_bytes, s_handle, 0, config
Expand Down Expand Up @@ -1633,9 +1665,9 @@ namespace pycuda
void set_src_host(py::object buf_py) \
{ \
srcMemoryType = CU_MEMORYTYPE_HOST; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsReadBuffer(buf_py.ptr(), &srcHost, &len)) \
throw py::error_already_set(); \
py_buffer_wrapper buf_wrapper; \
buf_wrapper.get(buf_py.ptr(), PyBUF_ANY_CONTIGUOUS); \
srcHost = buf_wrapper.m_buf.buf; \
} \
\
void set_src_array(array const &ary) \
Expand All @@ -1653,9 +1685,9 @@ namespace pycuda
void set_dst_host(py::object buf_py) \
{ \
dstMemoryType = CU_MEMORYTYPE_HOST; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsWriteBuffer(buf_py.ptr(), &dstHost, &len)) \
throw py::error_already_set(); \
py_buffer_wrapper buf_wrapper; \
buf_wrapper.get(buf_py.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); \
dstHost = buf_wrapper.m_buf.buf; \
} \
\
void set_dst_array(array const &ary) \
Expand All @@ -1675,17 +1707,17 @@ namespace pycuda
void set_src_unified(py::object buf_py) \
{ \
srcMemoryType = CU_MEMORYTYPE_UNIFIED; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsReadBuffer(buf_py.ptr(), &srcHost, &len)) \
throw py::error_already_set(); \
py_buffer_wrapper buf_wrapper; \
buf_wrapper.get(buf_py.ptr(), PyBUF_ANY_CONTIGUOUS); \
srcHost = buf_wrapper.m_buf.buf; \
} \
\
void set_dst_unified(py::object buf_py) \
{ \
dstMemoryType = CU_MEMORYTYPE_UNIFIED; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsWriteBuffer(buf_py.ptr(), &dstHost, &len)) \
throw py::error_already_set(); \
py_buffer_wrapper buf_wrapper; \
buf_wrapper.get(buf_py.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); \
dstHost = buf_wrapper.m_buf.buf; \
}
#else
#define MEMCPY_SETTERS_UNIFIED /* empty */
Expand Down Expand Up @@ -2001,7 +2033,7 @@ namespace pycuda
py::object m_base;

public:
registered_host_memory(void *p, size_t bytes, unsigned int flags=0,
registered_host_memory(void *p, size_t bytes, unsigned int flags=0,
py::object base=py::object())
: host_pointer(mem_host_register(p, bytes, flags)), m_base(base)
{
Expand Down
30 changes: 18 additions & 12 deletions src/cpp/curand.hpp
Expand Up @@ -46,12 +46,14 @@ namespace pycuda { namespace curandom {
void py_curand_get_direction_vectors(
curandDirectionVectorSet_t set, py::object dst, int count)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
int n = 0;

if (PyObject_AsWriteBuffer(dst.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dst.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

void *buf = buf_wrapper.m_buf.buf;
PYCUDA_BUFFER_SIZE_T len = buf_wrapper.m_buf.len;

if (CURAND_DIRECTION_VECTORS_32_JOEKUO6 == set
#if CUDAPP_CUDA_VERSION >= 4000
|| CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 == set
Expand Down Expand Up @@ -85,12 +87,14 @@ namespace pycuda { namespace curandom {
#if CUDAPP_CUDA_VERSION >= 4000
void py_curand_get_scramble_constants32(py::object dst, int count)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
int n = 0;

if (PyObject_AsWriteBuffer(dst.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dst.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

void *buf = buf_wrapper.m_buf.buf;
PYCUDA_BUFFER_SIZE_T len = buf_wrapper.m_buf.len;

unsigned int *vectors;
CURAND_CALL_GUARDED(curandGetScrambleConstants32, (&vectors));
// Documentation does not mention number of dimensions
Expand All @@ -105,12 +109,14 @@ namespace pycuda { namespace curandom {

void py_curand_get_scramble_constants64(py::object dst, int count)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
int n = 0;

if (PyObject_AsWriteBuffer(dst.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dst.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

void *buf = buf_wrapper.m_buf.buf;
PYCUDA_BUFFER_SIZE_T len = buf_wrapper.m_buf.len;

unsigned long long *vectors;
CURAND_CALL_GUARDED(curandGetScrambleConstants64, (&vectors));
// Documentation does not mention number of dimensions
Expand Down
66 changes: 31 additions & 35 deletions src/wrapper/wrap_cudadrv.cpp
Expand Up @@ -150,81 +150,75 @@ namespace

void py_memcpy_htod(CUdeviceptr dst, py::object src)
{
const void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsReadBuffer(src.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(src.ptr(), PyBUF_ANY_CONTIGUOUS);

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoD, (dst, buf, len));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoD,
(dst, buf_wrapper.m_buf.buf, buf_wrapper.m_buf.len));
}




void py_memcpy_htod_async(CUdeviceptr dst, py::object src, py::object stream_py)
{
const void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsReadBuffer(src.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(src.ptr(), PyBUF_ANY_CONTIGUOUS);

PYCUDA_PARSE_STREAM_PY;

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoDAsync, (dst, buf, len, s_handle));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoDAsync,
(dst, buf_wrapper.m_buf.buf, buf_wrapper.m_buf.len, s_handle));
}




void py_memcpy_dtoh(py::object dest, CUdeviceptr src)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsWriteBuffer(dest.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dest.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyDtoH, (buf, src, len));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyDtoH,
(buf_wrapper.m_buf.buf, src, buf_wrapper.m_buf.len));
}




void py_memcpy_dtoh_async(py::object dest, CUdeviceptr src, py::object stream_py)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsWriteBuffer(dest.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dest.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

PYCUDA_PARSE_STREAM_PY;

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyDtoHAsync, (buf, src, len, s_handle));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyDtoHAsync,
(buf_wrapper.m_buf.buf, src, buf_wrapper.m_buf.len, s_handle));
}




void py_memcpy_htoa(array const &ary, unsigned int index, py::object src)
{
const void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsReadBuffer(src.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(src.ptr(), PyBUF_ANY_CONTIGUOUS);

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoA, (ary.handle(), index, buf, len));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyHtoA,
(ary.handle(), index, buf_wrapper.m_buf.buf, buf_wrapper.m_buf.len));
}




void py_memcpy_atoh(py::object dest, array const &ary, unsigned int index)
{
void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsWriteBuffer(dest.ptr(), &buf, &len))
throw py::error_already_set();
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(dest.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH, (buf, ary.handle(), index, len));
CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH,
(buf_wrapper.m_buf.buf, ary.handle(), index, buf_wrapper.m_buf.len));
}


Expand Down Expand Up @@ -298,11 +292,13 @@ namespace

void function_param_setv(function &f, int offset, py::object buffer)
{
const void *buf;
PYCUDA_BUFFER_SIZE_T len;
if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len))
throw py::error_already_set();
f.param_setv(offset, const_cast<void *>(buf), len);
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);

f.param_setv(
offset,
const_cast<void *>(buf_wrapper.m_buf.buf),
buf_wrapper.m_buf.len);
}


Expand Down

0 comments on commit 6be1bea

Please sign in to comment.