Skip to content

Commit

Permalink
Merge pull request #44 from untom/outparam
Browse files Browse the repository at this point in the history
Add 'out' parameter to unary math functions.
  • Loading branch information
inducer committed Mar 10, 2014
2 parents 56414bf + e023047 commit 9e071c6
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 20 deletions.
32 changes: 16 additions & 16 deletions doc/source/array.rst
Expand Up @@ -338,34 +338,34 @@ workalikes for the functions contained in :mod:`math`.
Rounding and Absolute Value
^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. function:: fabs(array, stream=None)
.. function:: ceil(array, stream=None)
.. function:: floor(array, stream=None)
.. function:: fabs(array, *, out=None, stream=None)
.. function:: ceil(array, *, out=None, stream=None)
.. function:: floor(array, *, out=None, stream=None)

Exponentials, Logarithms and Roots
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. function:: exp(array, stream=None)
.. function:: log(array, stream=None)
.. function:: log10(array, stream=None)
.. function:: sqrt(array, stream=None)
.. function:: exp(array, *, out=None, stream=None)
.. function:: log(array, *, out=None, stream=None)
.. function:: log10(array, *, out=None, stream=None)
.. function:: sqrt(array, *, out=None, stream=None)

Trigonometric Functions
^^^^^^^^^^^^^^^^^^^^^^^

.. function:: sin(array, stream=None)
.. function:: cos(array, stream=None)
.. function:: tan(array, stream=None)
.. function:: asin(array, stream=None)
.. function:: acos(array, stream=None)
.. function:: atan(array, stream=None)
.. function:: sin(array, *, out=None, stream=None)
.. function:: cos(array, *, out=None, stream=None)
.. function:: tan(array, *, out=None, stream=None)
.. function:: asin(array, *, out=None, stream=None)
.. function:: acos(array, *, out=None, stream=None)
.. function:: atan(array, *, out=None, stream=None)

Hyperbolic Functions
^^^^^^^^^^^^^^^^^^^^

.. function:: sinh(array, stream=None)
.. function:: cosh(array, stream=None)
.. function:: tanh(array, stream=None)
.. function:: sinh(array, *, out=None, stream=None)
.. function:: cosh(array, *, out=None, stream=None)
.. function:: tanh(array, *, out=None, stream=None)

Floating Point Decomposition and Assembly
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
32 changes: 28 additions & 4 deletions pycuda/cumath.py
@@ -1,10 +1,27 @@
import pycuda.gpuarray as gpuarray
import pycuda.elementwise as elementwise
import numpy as np
import warnings
from pycuda.driver import Stream


def _make_unary_array_func(name):
def f(array, stream=None):
result = array._new_like_me()
def f(array, stream_or_out=None, **kwargs):

if stream_or_out is not None:
warnings.warn("please use 'out' or 'stream' keyword arguments", DeprecationWarning)
if isinstance(stream_or_out, Stream):
stream = stream_or_out
out = None
else:
stream = None
out = stream_or_out

out, stream = None, None
if 'out' in kwargs:
out = kwargs['out']
if 'stream' in kwargs:
stream = kwargs['stream']

if array.dtype == np.float32:
func_name = name + "f"
Expand All @@ -15,11 +32,18 @@ def f(array, stream=None):
raise RuntimeError("only contiguous arrays may "
"be used as arguments to this operation")

if out is None:
out = array._new_like_me()
else:
assert out.dtype == array.dtype
assert out.strides == array.strides
assert out.shape == array.shape

func = elementwise.get_unary_func_kernel(func_name, array.dtype)
func.prepared_async_call(array._grid, array._block, stream,
array.gpudata, result.gpudata, array.mem_size)
array.gpudata, out.gpudata, array.mem_size)

return result
return out
return f

fabs = _make_unary_array_func("fabs")
Expand Down
75 changes: 75 additions & 0 deletions test/test_cumath.py
Expand Up @@ -54,6 +54,14 @@ def test():
assert (max_err <= threshold).all(), \
(max_err, name, dtype)

gpu_results2 = gpuarray.empty_like(args)
gr2 = gpu_func(args, out=gpu_results2)
assert gpu_results2 is gr2
gr2 = gr2.get()
max_err = np.max(np.abs(cpu_results - gr2))
assert (max_err <= threshold).all(), \
(max_err, name, dtype)

return mark_cuda_test(test)


Expand Down Expand Up @@ -157,6 +165,73 @@ def test_frexp(self):
assert sig_true == significands[i]
assert ex_true == exponents[i]

@mark_cuda_test
def test_unary_func_kwargs(self):
"""tests if the kwargs to the unary functions work"""
from pycuda.driver import Stream

name, a, b, threshold = ("exp", -3, 3, 1e-5)
gpu_func = getattr(cumath, name)
cpu_func = getattr(np, numpy_func_names.get(name, name))
for s in sizes:
for dtype in dtypes:
np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
if complex:
A += (np.random.random(s)*(b-a) + a)*1j

np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
args = gpuarray.to_gpu(A)

# 'out' kw
gpu_results = gpuarray.empty_like(args)
gpu_results = gpu_func(args, out=gpu_results).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), (max_err, name, dtype)

# 'out' position
gpu_results = gpuarray.empty_like(args)
gpu_results = gpu_func(args, gpu_results).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), (max_err, name, dtype)

# 'stream' kw
mystream = Stream()
np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
args = gpuarray.to_gpu(A)
gpu_results = gpuarray.empty_like(args)
gpu_results = gpu_func(args, stream=mystream).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), (max_err, name, dtype)

# 'stream' position
mystream = Stream()
np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
args = gpuarray.to_gpu(A)
gpu_results = gpuarray.empty_like(args)
gpu_results = gpu_func(args, mystream).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), (max_err, name, dtype)

# 'out' and 'stream' kw
mystream = Stream()
np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
args = gpuarray.to_gpu(A)
gpu_results = gpuarray.empty_like(args)
gpu_results = gpu_func(args, stream=mystream, out=gpu_results).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), (max_err, name, dtype)


if __name__ == "__main__":
# make sure that import failures get reported, instead of skipping the tests.
import pycuda.autoinit # noqa
Expand Down

0 comments on commit 9e071c6

Please sign in to comment.