Skip to content

Commit

Permalink
Adapt work group size based on available lmem
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Jul 15, 2015
1 parent 2181288 commit 2b98b07
Showing 1 changed file with 25 additions and 1 deletion.
26 changes: 25 additions & 1 deletion pyopencl/bitonic_sort.py
Expand Up @@ -176,7 +176,31 @@ def sort_b_prepare_wl(self, argsort, key_dtype, idx_dtype, shape, axis):
allowb8 = True
allowb16 = True

wg = min(ds, self.context.devices[0].max_work_group_size)
dev = self.context.devices[0]

# {{{ find workgroup size

wg = min(ds, dev.max_work_group_size)

available_lmem = dev.local_mem_size
while True:
lmem_size = wg*4*key_dtype.itemsize
if argsort:
lmem_size += wg*4*idx_dtype.itemsize

if lmem_size + 512 > available_lmem:
wg //= 2

if not wg:
raise RuntimeError(
"too little local memory available on '%s'"
% dev)

else:
break

# }}}

length = wg >> 1
prg = self.get_program(
'BLO', argsort, (1, 1, key_ctype, idx_ctype, ds, ns))
Expand Down

0 comments on commit 2b98b07

Please sign in to comment.