Skip to content

Commit

Permalink
Merge pull request #129 from Shane-J-Latham/feature-clinfo-cl_ext
Browse files Browse the repository at this point in the history
Ship (and optionally use) ext.h from clinfo to replace CL/cl_ext.h
  • Loading branch information
inducer committed Jun 14, 2016
2 parents 1f6d1d1 + 21b471a commit b53078a
Show file tree
Hide file tree
Showing 12 changed files with 428 additions and 10 deletions.
6 changes: 6 additions & 0 deletions cl_types.h
Expand Up @@ -105,6 +105,12 @@ typedef struct _cl_buffer_region {

/* cl_ext.h */

typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} cl_device_topology_amd;

/*
typedef cl_ulong cl_device_partition_property_ext;
typedef cl_uint cl_image_pitch_info_qcom;
Expand Down
22 changes: 22 additions & 0 deletions doc/make_constants.py
Expand Up @@ -27,11 +27,16 @@
fission = ("cl_ext_device_fission", "2011.1")
nv_devattr = ("cl_nv_device_attribute_query", "0.92")
gl_sharing = ("cl_khr_gl_sharing", "0.92")
cl_spir_devattr = ("cl_khr_spir", "2016.2")
cl_11 = ("CL_1.1", "0.92")
cl_12 = ("CL_1.2", "2011.2")
cl_12_2015 = ("CL_1.2", "2015.2")
cl_20 = ("CL_2.0", "2015.2")
amd_devattr = ("cl_amd_device_attribute_query", "2013.2")
qcom_hp_devattr = ("cl_qcom_ext_host_ptr", "2016.2")
intel_me_devattr = ("cl_intel_advanced_motion_estimation", "2016.2")
intel_ss_devattr = ("cl_intel_simultaneous_sharing", "2016.2")
altera_temp_devattr = ("cl_altera_device_temperature", "2016.2")


def get_extra_lines(tup):
Expand Down Expand Up @@ -90,13 +95,17 @@ def get_extra_lines(tup):
"NATIVE_VECTOR_WIDTH_DOUBLE": cl_11,
"NATIVE_VECTOR_WIDTH_HALF": cl_11,
"OPENCL_C_VERSION": cl_11,
"SPIR_VERSIONS": cl_spir_devattr,
"COMPUTE_CAPABILITY_MAJOR_NV": nv_devattr,
"COMPUTE_CAPABILITY_MINOR_NV": nv_devattr,
"REGISTERS_PER_BLOCK_NV": nv_devattr,
"WARP_SIZE_NV": nv_devattr,
"GPU_OVERLAP_NV": nv_devattr,
"KERNEL_EXEC_TIMEOUT_NV": nv_devattr,
"INTEGRATED_MEMORY_NV": nv_devattr,
"ATTRIBUTE_ASYNC_ENGINE_COUNT_NV": nv_devattr,
"PCI_BUS_ID_NV": nv_devattr,
"PCI_BUS_SLOT_NV": nv_devattr,

"DOUBLE_FP_CONFIG":
("cl_khr_fp64", "2011.1"),
Expand All @@ -116,6 +125,19 @@ def get_extra_lines(tup):
"GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD": amd_devattr,
"LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD": amd_devattr,
"LOCAL_MEM_BANKS_AMD": amd_devattr,
"THREAD_TRACE_SUPPORTED_AMD": amd_devattr,
"GFXIP_MAJOR_AMD": amd_devattr,
"GFXIP_MINOR_AMD": amd_devattr,
"AVAILABLE_ASYNC_QUEUES_AMD": amd_devattr,

"ME_VERSION_INTEL": intel_me_devattr,
"SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr,
"NUM_SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr,

"EXT_MEM_PADDING_IN_BYTES_QCOM": qcom_hp_devattr,
"PAGE_SIZE_QCOM": qcom_hp_devattr,

"CORE_TEMPERATURE_ALTERA": altera_temp_devattr,

"MAX_ATOMIC_COUNTERS_EXT":
("cl_ext_atomic_counters_64", "2013.2"),
Expand Down
1 change: 1 addition & 0 deletions pyopencl/__init__.py
Expand Up @@ -165,6 +165,7 @@
Image,
Sampler,
GLTexture,
DeviceTopologyAmd,
)

if _cl.have_gl():
Expand Down
46 changes: 46 additions & 0 deletions pyopencl/cffi_cl.py
Expand Up @@ -169,6 +169,8 @@ def create_inst(val):

if type_ == 'char*':
ret = _ffi_pystr(value)
elif type_ == 'cl_device_topology_amd*':
ret = DeviceTopologyAmd(value.pcie.bus, value.pcie.device, value.pcie.function)
elif type_.startswith('char*['):
ret = list(map(_ffi_pystr, value))
_lib.free_pointer_array(info.value, len(value))
Expand Down Expand Up @@ -1980,4 +1982,48 @@ def __init__(self, context, flags, texture_target, miplevel, texture, dims=None)

# }}}

class DeviceTopologyAmd(object):
# Hack around fmt.__dict__ check in test_wrapper.py
__dict__ = {}
__slots__ = ('ptr',)

def __init__(self, bus=0, device=0, function=0):
self.ptr = _ffi.new("cl_device_topology_amd*")
self.bus = bus
self.device = device
self.function = function

def _check_range(self, value, prop=None):
if (value < -127) or (value > 127):
raise ValueError("Value %s not in range [-127, 127].")

@_cffi_property('pcie')
def _pcie(self):
return self.ptr

@property
def bus(self):
return self._pcie.bus

@bus.setter
def bus(self, value):
self._check_range(value)
self._pcie.bus = value

@property
def device(self):
return self._pcie.device

@device.setter
def device(self, value):
self._pcie.device = value

@property
def function(self):
return self._pcie.function

@function.setter
def function(self, value):
self._pcie.function = value

# vim: foldmethod=marker
8 changes: 8 additions & 0 deletions setup.py
Expand Up @@ -72,6 +72,11 @@ def get_config_schema():
return ConfigSchema([
Switch("CL_TRACE", False, "Enable OpenCL API tracing"),
Switch("CL_ENABLE_GL", False, "Enable OpenCL<->OpenGL interoperability"),
Switch("CL_USE_SHIPPED_EXT", True,
"Use the pyopencl version of CL/cl_ext.h which includes" +
" a broader range of vendor-specific OpenCL extension attributes" +
" than the standard Khronos (or vendor specific) CL/cl_ext.h."
),
Option("CL_PRETEND_VERSION", None,
"Dotted CL version (e.g. 1.2) which you'd like to use."),

Expand Down Expand Up @@ -107,6 +112,9 @@ def main():
if conf["CL_ENABLE_GL"]:
extra_defines["HAVE_GL"] = 1

if conf["CL_USE_SHIPPED_EXT"]:
extra_defines["PYOPENCL_USE_SHIPPED_EXT"] = 1

if conf["CL_PRETEND_VERSION"]:
try:
major, minor = [int(x) for x in conf["CL_PRETEND_VERSION"].split(".")]
Expand Down
11 changes: 11 additions & 0 deletions src/c_wrapper/clhelper.h
Expand Up @@ -243,4 +243,15 @@ operator<<(std::ostream &stm, const cl_image_format &fmt)
return stm;
}

#ifdef CL_DEVICE_TOPOLOGY_AMD
static PYOPENCL_INLINE std::ostream&
operator<<(std::ostream &stm, const cl_device_topology_amd &topol)
{
stm << "pcie.bus: " << topol.pcie.bus
<< ",\npcie.device: " << topol.pcie.device
<< ",\npcie.function: " << topol.pcie.function
<< ",\npcie.type: " << topol.pcie.type;
return stm;
}
#endif
#endif
165 changes: 165 additions & 0 deletions src/c_wrapper/clinfo_ext.h
@@ -0,0 +1,165 @@
/* Include OpenCL header, and define OpenCL extensions, since what is and is not
* available in the official headers is very system-dependent */

#ifndef _EXT_H
#define _EXT_H

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

/* These two defines were introduced in the 1.2 headers
* on 2012-11-30, so earlier versions don't have them
* (e.g. Debian wheezy)
*/

#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
#endif

/* 2.0 headers are not very common for the time being, so
* let's copy the defines for the new CL_DEVICE_* properties
* here.
*/
#ifndef CL_VERSION_2_0
#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C
#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D
#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E
#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F
#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050
#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051
#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054
#define CL_DEVICE_MAX_PIPE_ARGS 0x1055
#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056
#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058
#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059
#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A

#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)

typedef cl_bitfield cl_device_svm_capabilities;
#endif

#ifndef CL_VERSION_2_1
#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905
#define CL_DEVICE_IL_VERSION 0x105B
#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C
#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D
#endif

/*
* Extensions
*/

/* cl_khr_icd */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
#define CL_PLATFORM_NOT_FOUND_KHR -1001


/* cl_khr_fp64 */
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032

/* cl_khr_fp16 */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033

/* cl_khr_terminate_context */
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F

/* cl_nv_device_attribute_query */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009

/* cl_ext_atomic_counters_{32,64} */
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032

/* cl_amd_device_attribute_query */
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C

#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1

typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} cl_device_topology_amd;
#endif

/* cl_amd_offline_devices */
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F

/* cl_ext_device_fission */
#define cl_ext_device_fission 1

typedef cl_ulong cl_device_partition_property_ext;

#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053

#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058

#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100

/* cl_intel_advanced_motion_estimation */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E

/* cl_qcom_ext_host_ptr */
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1

/* cl_khr_spir */
#define CL_DEVICE_SPIR_VERSIONS 0x40E0

/* cl_altera_device_temperature */
#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3

/* cl_intel_simultaneous_sharing */
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105

#endif

0 comments on commit b53078a

Please sign in to comment.