Skip to content

Commit

Permalink
Add smem_alloc_granularity.
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Aug 13, 2011
1 parent 44007e8 commit 280d3d9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
6 changes: 5 additions & 1 deletion doc/source/util.rst
Expand Up @@ -103,10 +103,14 @@ Device Metadata and Occupancy
.. attribute:: registers
.. attribute:: shared_memory
.. attribute:: smem_granularity

The number of threads that participate in banked, simultaneous access
to shared memory.

.. attribute:: smem_alloc_granularity

The size of the smallest possible (non-empty) shared memory allocation.

.. method:: align_bytes(word_size=4)

The distance between global memory base addresses that
Expand Down
4 changes: 3 additions & 1 deletion pycuda/tools.py
Expand Up @@ -240,8 +240,10 @@ def __init__(self, dev=None):
self.shared_memory = dev.get_attribute(drv.device_attribute.MAX_SHARED_MEMORY_PER_BLOCK)

if dev.compute_capability() >= (2,0):
self.smem_alloc_granularity = 128
self.smem_granularity = 32
else:
self.smem_alloc_granularity = 512
self.smem_granularity = 16

if dev.compute_capability() >= (2,0):
Expand Down Expand Up @@ -291,7 +293,7 @@ def __init__(self, devdata, threads, shared_mem=0, registers=0):

# copied literally from occupancy calculator
alloc_warps = _int_ceiling(threads/devdata.warp_size)
alloc_smem = _int_ceiling(shared_mem, devdata.smem_granularity)
alloc_smem = _int_ceiling(shared_mem, devdata.smem_alloc_granularity)
if devdata.register_allocation_unit == "warp":
alloc_regs = alloc_warps*32*registers
elif devdata.register_allocation_unit == "block":
Expand Down

0 comments on commit 280d3d9

Please sign in to comment.