Merge pull request #44 from untom/outparam

Add 'out' parameter to unary math functions.
inducer · Mar 10, 2014 · 9e071c6 · 9e071c6
2 parents 56414bf + e023047
commit 9e071c6
Show file tree

Hide file tree

Showing 3 changed files with 119 additions and 20 deletions.
diff --git a/doc/source/array.rst b/doc/source/array.rst
@@ -338,34 +338,34 @@ workalikes for the functions contained in :mod:`math`.
 Rounding and Absolute Value
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. function:: fabs(array, stream=None)
-.. function:: ceil(array, stream=None)
-.. function:: floor(array, stream=None)
+.. function:: fabs(array, *, out=None, stream=None)
+.. function:: ceil(array, *, out=None, stream=None)
+.. function:: floor(array, *, out=None, stream=None)
 
 Exponentials, Logarithms and Roots
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. function:: exp(array, stream=None)
-.. function:: log(array, stream=None)
-.. function:: log10(array, stream=None)
-.. function:: sqrt(array, stream=None)
+.. function:: exp(array, *, out=None, stream=None)
+.. function:: log(array, *, out=None, stream=None)
+.. function:: log10(array, *, out=None, stream=None)
+.. function:: sqrt(array, *, out=None, stream=None)
 
 Trigonometric Functions
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-.. function:: sin(array, stream=None)
-.. function:: cos(array, stream=None)
-.. function:: tan(array, stream=None)
-.. function:: asin(array, stream=None)
-.. function:: acos(array, stream=None)
-.. function:: atan(array, stream=None)
+.. function:: sin(array, *, out=None, stream=None)
+.. function:: cos(array, *, out=None, stream=None)
+.. function:: tan(array, *, out=None, stream=None)
+.. function:: asin(array, *, out=None, stream=None)
+.. function:: acos(array, *, out=None, stream=None)
+.. function:: atan(array, *, out=None, stream=None)
 
 Hyperbolic Functions
 ^^^^^^^^^^^^^^^^^^^^
 
-.. function:: sinh(array, stream=None)
-.. function:: cosh(array, stream=None)
-.. function:: tanh(array, stream=None)
+.. function:: sinh(array, *, out=None, stream=None)
+.. function:: cosh(array, *, out=None, stream=None)
+.. function:: tanh(array, *, out=None, stream=None)
 
 Floating Point Decomposition and Assembly
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/pycuda/cumath.py b/pycuda/cumath.py
@@ -1,10 +1,27 @@
 import pycuda.gpuarray as gpuarray
 import pycuda.elementwise as elementwise
 import numpy as np
+import warnings
+from pycuda.driver import Stream
+
 
 def _make_unary_array_func(name):
-    def f(array, stream=None):
-        result = array._new_like_me()
+    def f(array, stream_or_out=None, **kwargs):
+
+        if stream_or_out is not None:
+            warnings.warn("please use 'out' or 'stream' keyword arguments", DeprecationWarning)
+            if isinstance(stream_or_out, Stream):
+                stream = stream_or_out
+                out = None
+            else:
+                stream = None
+                out = stream_or_out
+
+        out, stream = None, None
+        if 'out' in kwargs:
+            out = kwargs['out']
+        if 'stream' in kwargs:
+            stream = kwargs['stream']
 
         if array.dtype == np.float32:
             func_name = name + "f"
@@ -15,11 +32,18 @@ def f(array, stream=None):
             raise RuntimeError("only contiguous arrays may "
                     "be used as arguments to this operation")
 
+        if out is None:
+            out = array._new_like_me()
+        else:
+            assert out.dtype == array.dtype
+            assert out.strides == array.strides
+            assert out.shape == array.shape
+
         func = elementwise.get_unary_func_kernel(func_name, array.dtype)
         func.prepared_async_call(array._grid, array._block, stream,
-                array.gpudata, result.gpudata, array.mem_size)
+                array.gpudata, out.gpudata, array.mem_size)
 
-        return result
+        return out
     return f
 
 fabs = _make_unary_array_func("fabs")

diff --git a/test/test_cumath.py b/test/test_cumath.py
@@ -54,6 +54,14 @@ def test():
                 assert (max_err <= threshold).all(), \
                         (max_err, name, dtype)
 
+                gpu_results2 = gpuarray.empty_like(args)
+                gr2 = gpu_func(args, out=gpu_results2)
+                assert gpu_results2 is gr2
+                gr2 = gr2.get()
+                max_err = np.max(np.abs(cpu_results - gr2))
+                assert (max_err <= threshold).all(), \
+                        (max_err, name, dtype)
+
     return mark_cuda_test(test)
 
 
@@ -157,6 +165,73 @@ def test_frexp(self):
                 assert sig_true == significands[i]
                 assert ex_true == exponents[i]
 
+    @mark_cuda_test
+    def test_unary_func_kwargs(self):
+        """tests if the kwargs to the unary functions work"""
+        from pycuda.driver import Stream
+
+        name, a, b, threshold = ("exp", -3, 3, 1e-5)
+        gpu_func = getattr(cumath, name)
+        cpu_func = getattr(np, numpy_func_names.get(name, name))
+        for s in sizes:
+            for dtype in dtypes:
+                np.random.seed(1)
+                A = (np.random.random(s)*(b-a) + a).astype(dtype)
+                if complex:
+                    A += (np.random.random(s)*(b-a) + a)*1j
+
+                np.random.seed(1)
+                A = (np.random.random(s)*(b-a) + a).astype(dtype)
+                args = gpuarray.to_gpu(A)
+
+                # 'out' kw
+                gpu_results = gpuarray.empty_like(args)
+                gpu_results = gpu_func(args, out=gpu_results).get()
+                cpu_results = cpu_func(A)
+                max_err = np.max(np.abs(cpu_results - gpu_results))
+                assert (max_err <= threshold).all(), (max_err, name, dtype)
+
+                # 'out' position
+                gpu_results = gpuarray.empty_like(args)
+                gpu_results = gpu_func(args, gpu_results).get()
+                cpu_results = cpu_func(A)
+                max_err = np.max(np.abs(cpu_results - gpu_results))
+                assert (max_err <= threshold).all(), (max_err, name, dtype)
+
+                # 'stream' kw
+                mystream = Stream()
+                np.random.seed(1)
+                A = (np.random.random(s)*(b-a) + a).astype(dtype)
+                args = gpuarray.to_gpu(A)
+                gpu_results = gpuarray.empty_like(args)
+                gpu_results = gpu_func(args, stream=mystream).get()
+                cpu_results = cpu_func(A)
+                max_err = np.max(np.abs(cpu_results - gpu_results))
+                assert (max_err <= threshold).all(), (max_err, name, dtype)
+
+                # 'stream' position
+                mystream = Stream()
+                np.random.seed(1)
+                A = (np.random.random(s)*(b-a) + a).astype(dtype)
+                args = gpuarray.to_gpu(A)
+                gpu_results = gpuarray.empty_like(args)
+                gpu_results = gpu_func(args, mystream).get()
+                cpu_results = cpu_func(A)
+                max_err = np.max(np.abs(cpu_results - gpu_results))
+                assert (max_err <= threshold).all(), (max_err, name, dtype)
+
+                # 'out' and 'stream' kw
+                mystream = Stream()
+                np.random.seed(1)
+                A = (np.random.random(s)*(b-a) + a).astype(dtype)
+                args = gpuarray.to_gpu(A)
+                gpu_results = gpuarray.empty_like(args)
+                gpu_results = gpu_func(args, stream=mystream, out=gpu_results).get()
+                cpu_results = cpu_func(A)
+                max_err = np.max(np.abs(cpu_results - gpu_results))
+                assert (max_err <= threshold).all(), (max_err, name, dtype)
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.
     import pycuda.autoinit  # noqa