Merge pull request #49 from untom/scalar_maximum

ENH: gpuarray.minimum/maximum accept scalar argument
inducer · Aug 12, 2014 · 67569f9 · 67569f9
2 parents 6be1bea + fe9985b
commit 67569f9
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 13 deletions.
diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py
@@ -451,16 +451,29 @@ def get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z):
             "z[i] = %s(x[i], y[i])" % func,
             func+"_kernel")
 
+@context_dependent_memoize
+def get_binary_func_scalar_kernel(func, dtype_x, dtype_y, dtype_z):
+    return get_elwise_kernel(
+            "%(tp_x)s *x, %(tp_y)s y, %(tp_z)s *z" % {
+                "tp_x": dtype_to_ctype(dtype_x),
+                "tp_y": dtype_to_ctype(dtype_y),
+                "tp_z": dtype_to_ctype(dtype_z),
+                },
+            "z[i] = %s(x[i], y)" % func,
+            func+"_kernel")
 
-def get_binary_minmax_kernel(func, dtype_x, dtype_y, dtype_z):
+def get_binary_minmax_kernel(func, dtype_x, dtype_y, dtype_z, use_scalar):
     if not np.float64 in [dtype_x, dtype_y]:
         func = func + "f"
 
     from pytools import any
     if any(dt.kind == "f" for dt in [dtype_x, dtype_y, dtype_z]):
         func = "f"+func
 
-    return get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z)
+    if use_scalar:
+        return get_binary_func_scalar_kernel(func, dtype_x, dtype_y, dtype_z)
+    else:
+        return get_binary_func_kernel(func, dtype_x, dtype_y, dtype_z)
 
 
 @context_dependent_memoize

diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
@@ -1243,15 +1243,29 @@ def if_positive(criterion, then_, else_, out=None, stream=None):
 
 def _make_binary_minmax_func(which):
     def f(a, b, out=None, stream=None):
-        if out is None:
-            out = empty_like(a)
-
-        func = elementwise.get_binary_minmax_kernel(which,
-                a.dtype, b.dtype, out.dtype)
-
-        func.prepared_async_call(a._grid, a._block, stream,
-                a.gpudata, b.gpudata, out.gpudata, a.size)
-
+        if isinstance(a, GPUArray) and isinstance(b, GPUArray):
+            if out is None:
+                out = empty_like(a)
+            func = elementwise.get_binary_minmax_kernel(which,
+                    a.dtype, b.dtype, out.dtype, use_scalar=False)
+
+            func.prepared_async_call(a._grid, a._block, stream,
+                    a.gpudata, b.gpudata, out.gpudata, a.size)
+        elif isinstance(a, GPUArray):
+            if out is None:
+                out = empty_like(a)
+            func = elementwise.get_binary_minmax_kernel(which,
+                    a.dtype, a.dtype, out.dtype, use_scalar=True)
+            func.prepared_async_call(a._grid, a._block, stream,
+                    a.gpudata, b, out.gpudata, a.size)
+        else:  # assuming b is a GPUArray
+            if out is None:
+                out = empty_like(b)
+            func = elementwise.get_binary_minmax_kernel(which,
+                    b.dtype, b.dtype, out.dtype, use_scalar=True)
+            # NOTE: we switch the order of a and b here!
+            func.prepared_async_call(b._grid, b._block, stream,
+                    b.gpudata, a, out.gpudata, b.size)
         return out
     return f
 

diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py
@@ -859,7 +859,7 @@ def test_view_and_strides(self):
     def test_scalar_comparisons(self):
         a = np.array([1.0, 0.25, 0.1, -0.1, 0.0])
         a_gpu = gpuarray.to_gpu(a)
-        
+
         x_gpu = a_gpu > 0.25
         x = (a > 0.25).astype(a.dtype)
         assert (x == x_gpu.get()).all()
@@ -876,7 +876,21 @@ def test_scalar_comparisons(self):
         x = (a == 1).astype(a.dtype)
         assert (x == x_gpu.get()).all()
 
-
+    @mark_cuda_test
+    def test_minimum_maximum_scalar(self):
+        from pycuda.curandom import rand as curand
+
+        l = 20
+        a_gpu = curand((l,))
+        a = a_gpu.get()
+
+        import pycuda.gpuarray as gpuarray
+
+        max_a0_gpu = gpuarray.maximum(a_gpu, 0)
+        min_a0_gpu = gpuarray.minimum(0, a_gpu)
+
+        assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0
+        assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0
 
 
 if __name__ == "__main__":