Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
set/to_gpu symmetrical with get
  • Loading branch information
davidweichiang committed Jul 12, 2015
1 parent ac83622 commit fa7a3bb
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 40 deletions.
63 changes: 25 additions & 38 deletions pycuda/gpuarray.py
Expand Up @@ -220,37 +220,23 @@ def ndim(self):
def flags(self):
return _ArrayFlags(self)

def set(self, ary):
assert ary.size == self.size
assert ary.dtype == self.dtype
if ary.strides != self.strides:
def set(self, ary, async=False, stream=None):
if ary.size != self.size:
raise ValueError("ary and self must be the same size")
if ary.shape != self.shape:
from warnings import warn
warn("Setting array from one with different strides/storage order. "
"This will cease to work in 2013.x.",
warn("Setting array from one with different shape.",
stacklevel=2)
ary = ary.reshape(self.shape)

assert self.flags.forc
if ary.dtype != self.dtype:
raise ValueError("ary and self must have the same dtype")

if self.size:
drv.memcpy_htod(self.gpudata, ary)
_memcpy_discontig(self, ary, async=async, stream=stream)

def set_async(self, ary, stream=None):
assert ary.size == self.size
assert ary.dtype == self.dtype
if ary.strides != self.strides:
from warnings import warn
warn("Setting array from one with different strides/storage order. "
"This will cease to work in 2013.x.",
stacklevel=2)

assert self.flags.forc

if not ary.flags.forc:
raise RuntimeError("cannot asynchronously set from "
"non-contiguous array")

if self.size:
drv.memcpy_htod_async(self.gpudata, ary, stream)
return set(ary, async=True, stream=None)

def get(self, ary=None, pagelocked=False, async=False, stream=None):
if ary is None:
Expand All @@ -259,18 +245,8 @@ def get(self, ary=None, pagelocked=False, async=False, stream=None):
else:
ary = np.empty(self.shape, self.dtype)

# Compute strides to have same order as self, but packed
info = sorted((self.strides[axis], self.shape[axis], axis) for axis in xrange(len(self.shape)))

dst_info = []
stride = self.dtype.itemsize
for _, dim, axis in info:
dst_info.append((axis, stride))
stride *= dim
dst_info.sort()
dst_strides = [stride for _, stride in dst_info]

ary = _as_strided(ary, strides=dst_strides)
strides = _compact_strides(self)
ary = _as_strided(ary, strides=strides)
else:
if self.size != ary.size:
raise ValueError("self and ary must be the same size")
Expand Down Expand Up @@ -972,14 +948,14 @@ def conj(self):

def to_gpu(ary, allocator=drv.mem_alloc):
"""converts a numpy array to a GPUArray"""
result = GPUArray(ary.shape, ary.dtype, allocator, strides=ary.strides)
result = GPUArray(ary.shape, ary.dtype, allocator, strides=_compact_strides(ary))
result.set(ary)
return result


def to_gpu_async(ary, allocator=drv.mem_alloc, stream=None):
"""converts a numpy array to a GPUArray"""
result = GPUArray(ary.shape, ary.dtype, allocator, strides=ary.strides)
result = GPUArray(ary.shape, ary.dtype, allocator, strides=_compact_strides(ary))
result.set_async(ary, stream)
return result

Expand Down Expand Up @@ -1098,6 +1074,17 @@ class Info(Record):

# }}}

def _compact_strides(a):
# Compute strides to have same order as self, but packed
info = sorted((a.strides[axis], a.shape[axis], axis) for axis in xrange(len(a.shape)))

strides = [None]*len(a.shape)
stride = a.dtype.itemsize
for _, dim, axis in info:
strides[axis] = stride
stride *= dim
return strides

def _memcpy_discontig(dst, src, async=False, stream=None):
"""Copy the contents of src into dst.
Expand Down
9 changes: 7 additions & 2 deletions test/test_gpuarray.py
Expand Up @@ -1004,14 +1004,19 @@ def test_copy(self):
for start, stop, step in [(0,3,1), (1,2,1), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step,:,start:stop:step].get(), a_gpu.get()[start:stop:step,:,start:stop:step])

def test_get(self):
def test_get_set(self):
import pycuda.gpuarray as gpuarray

a = np.random.normal(0., 1., (4,4))
a_gpu = gpuarray.to_gpu(a)

assert np.allclose(a_gpu.get(), a)
assert np.allclose(a_gpu[1:3,1:3].get(), a[1:3,1:3])

a = np.random.normal(0., 1., (4,4,4)).transpose((1,2,0))
a_gpu = gpuarray.to_gpu(a)
assert np.allclose(a_gpu.get(), a)
assert np.allclose(a_gpu[1:3,1:3,1:3].get(), a[1:3,1:3,1:3])

if __name__ == "__main__":
# make sure that import failures get reported, instead of skipping the tests.
import pycuda.autoinit # noqa
Expand Down

0 comments on commit fa7a3bb

Please sign in to comment.