Skip to content

Commit

Permalink
Merge pull request #49 from untom/scalar_maximum
Browse files Browse the repository at this point in the history
ENH: gpuarray.minimum/maximum accept scalar argument
  • Loading branch information
inducer committed Aug 12, 2014
2 parents 6be1bea + fe9985b commit 67569f9
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 13 deletions.
17 changes: 15 additions & 2 deletions pycuda/elementwise.py
Expand Up @@ -451,16 +451,29 @@ def get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z):
"z[i] = %s(x[i], y[i])" % func,
func+"_kernel")

@context_dependent_memoize
def get_binary_func_scalar_kernel(func, dtype_x, dtype_y, dtype_z):
return get_elwise_kernel(
"%(tp_x)s *x, %(tp_y)s y, %(tp_z)s *z" % {
"tp_x": dtype_to_ctype(dtype_x),
"tp_y": dtype_to_ctype(dtype_y),
"tp_z": dtype_to_ctype(dtype_z),
},
"z[i] = %s(x[i], y)" % func,
func+"_kernel")

def get_binary_minmax_kernel(func, dtype_x, dtype_y, dtype_z):
def get_binary_minmax_kernel(func, dtype_x, dtype_y, dtype_z, use_scalar):
if not np.float64 in [dtype_x, dtype_y]:
func = func + "f"

from pytools import any
if any(dt.kind == "f" for dt in [dtype_x, dtype_y, dtype_z]):
func = "f"+func

return get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z)
if use_scalar:
return get_binary_func_scalar_kernel(func, dtype_x, dtype_y, dtype_z)
else:
return get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z)


@context_dependent_memoize
Expand Down
32 changes: 23 additions & 9 deletions pycuda/gpuarray.py
Expand Up @@ -1243,15 +1243,29 @@ def if_positive(criterion, then_, else_, out=None, stream=None):

def _make_binary_minmax_func(which):
def f(a, b, out=None, stream=None):
if out is None:
out = empty_like(a)

func = elementwise.get_binary_minmax_kernel(which,
a.dtype, b.dtype, out.dtype)

func.prepared_async_call(a._grid, a._block, stream,
a.gpudata, b.gpudata, out.gpudata, a.size)

if isinstance(a, GPUArray) and isinstance(b, GPUArray):
if out is None:
out = empty_like(a)
func = elementwise.get_binary_minmax_kernel(which,
a.dtype, b.dtype, out.dtype, use_scalar=False)

func.prepared_async_call(a._grid, a._block, stream,
a.gpudata, b.gpudata, out.gpudata, a.size)
elif isinstance(a, GPUArray):
if out is None:
out = empty_like(a)
func = elementwise.get_binary_minmax_kernel(which,
a.dtype, a.dtype, out.dtype, use_scalar=True)
func.prepared_async_call(a._grid, a._block, stream,
a.gpudata, b, out.gpudata, a.size)
else: # assuming b is a GPUArray
if out is None:
out = empty_like(b)
func = elementwise.get_binary_minmax_kernel(which,
b.dtype, b.dtype, out.dtype, use_scalar=True)
# NOTE: we switch the order of a and b here!
func.prepared_async_call(b._grid, b._block, stream,
b.gpudata, a, out.gpudata, b.size)
return out
return f

Expand Down
18 changes: 16 additions & 2 deletions test/test_gpuarray.py
Expand Up @@ -859,7 +859,7 @@ def test_view_and_strides(self):
def test_scalar_comparisons(self):
a = np.array([1.0, 0.25, 0.1, -0.1, 0.0])
a_gpu = gpuarray.to_gpu(a)

x_gpu = a_gpu > 0.25
x = (a > 0.25).astype(a.dtype)
assert (x == x_gpu.get()).all()
Expand All @@ -876,7 +876,21 @@ def test_scalar_comparisons(self):
x = (a == 1).astype(a.dtype)
assert (x == x_gpu.get()).all()


@mark_cuda_test
def test_minimum_maximum_scalar(self):
from pycuda.curandom import rand as curand

l = 20
a_gpu = curand((l,))
a = a_gpu.get()

import pycuda.gpuarray as gpuarray

max_a0_gpu = gpuarray.maximum(a_gpu, 0)
min_a0_gpu = gpuarray.minimum(0, a_gpu)

assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0
assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0


if __name__ == "__main__":
Expand Down

0 comments on commit 67569f9

Please sign in to comment.