Skip to content

Commit

Permalink
Add double precision tests, xfail them on POCL
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Jul 17, 2015
1 parent 770d6e4 commit c7ac1c2
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
13 changes: 8 additions & 5 deletions pyopencl/bitonic_sort.py
Expand Up @@ -121,8 +121,10 @@ def __call__(self, arr, idx=None, queue=None, wait_for=None, axis=0):
if aux:
last_evt = knl(
queue, (nt,), wg, arr.data, idx.data,
cl.LocalMemory(wg[0]*arr.dtype.itemsize),
cl.LocalMemory(wg[0]*idx.dtype.itemsize),
cl.LocalMemory(
_tmpl.LOCAL_MEM_FACTOR*wg[0]*arr.dtype.itemsize),
cl.LocalMemory(
_tmpl.LOCAL_MEM_FACTOR*wg[0]*idx.dtype.itemsize),
wait_for=[last_evt])
for knl, nt, wg, _ in run_queue[1:]:
last_evt = knl(
Expand All @@ -133,7 +135,8 @@ def __call__(self, arr, idx=None, queue=None, wait_for=None, axis=0):
if aux:
last_evt = knl(
queue, (nt,), wg, arr.data,
cl.LocalMemory(wg[0]*4*arr.dtype.itemsize),
cl.LocalMemory(
_tmpl.LOCAL_MEM_FACTOR*wg[0]*4*arr.dtype.itemsize),
wait_for=[last_evt])
for knl, nt, wg, _ in run_queue[1:]:
last_evt = knl(queue, (nt,), wg, arr.data, wait_for=[last_evt])
Expand Down Expand Up @@ -184,9 +187,9 @@ def sort_b_prepare_wl(self, argsort, key_dtype, idx_dtype, shape, axis):

available_lmem = dev.local_mem_size
while True:
lmem_size = wg*key_dtype.itemsize
lmem_size = _tmpl.LOCAL_MEM_FACTOR*wg*key_dtype.itemsize
if argsort:
lmem_size += wg*idx_dtype.itemsize
lmem_size += _tmpl.LOCAL_MEM_FACTOR*wg*idx_dtype.itemsize

if lmem_size + 512 > available_lmem:
wg //= 2
Expand Down
9 changes: 9 additions & 0 deletions pyopencl/bitonic_sort_templates.py
Expand Up @@ -31,10 +31,17 @@
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

LOCAL_MEM_FACTOR = 1


# {{{ defines

defines = """//CL//
% if dtype == "double":
#pragma OPENCL EXTENSION cl_khr_fp64: enable
% endif
typedef ${dtype} data_t;
typedef ${idxtype} idx_t;
typedef ${idxtype}2 idx_t2;
Expand Down Expand Up @@ -318,6 +325,8 @@

# {{{ C4

# IF YOU REENABLE THIS, YOU NEED TO ADJUST LOCAL_MEM_FACTOR TO 4

ParallelBitonic_C4 = """//CL//
//ParallelBitonic_C4
__kernel void run\\
Expand Down
12 changes: 10 additions & 2 deletions test/test_algorithm.py
Expand Up @@ -846,13 +846,17 @@ def test_key_value_sorter(ctx_factory):
@pytest.mark.parametrize("dtype", [
np.int32,
np.float32,
# np.float64
np.float64
])
@pytest.mark.bitonic
def test_bitonic_sort(ctx_factory, size, dtype):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

if (ctx.devices[0].platform.name == "Portable Computing Language"
and dtype == np.float64):
pytest.xfail("Double precision bitonic sort doesn't work on POCL")

import pyopencl.clrandom as clrandom
from pyopencl.bitonic_sort import BitonicSort

Expand All @@ -871,13 +875,17 @@ def test_bitonic_sort(ctx_factory, size, dtype):
@pytest.mark.parametrize("dtype", [
np.int32,
np.float32,
# np.float64
np.float64
])
@pytest.mark.bitonic
def test_bitonic_argsort(ctx_factory, size, dtype):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

if (ctx.devices[0].platform.name == "Portable Computing Language"
and dtype == np.float64):
pytest.xfail("Double precision bitonic sort doesn't work on POCL")

import pyopencl.clrandom as clrandom
from pyopencl.bitonic_sort import BitonicSort

Expand Down

0 comments on commit c7ac1c2

Please sign in to comment.