add option to compile nvcc's fatbin

Fatbin embeds prebuild for set of specified real architectures cubin's and PTX assemblies for set of specified virtual architectures, allowing driver to load prebuild cubin if there is any for current real GPU, or to assemble PTX from closest virtual GPU architecture. This can be used for distributing CUDA-powered applications without need to install NVidia CUDA Toolkit (nvcc) and development environment on target platform. .cu files can be precompiled to fatbin's on build server with fatbin = pycuda.compiler.compile( cu_file_text, options=[ "-gencode", "arch=compute_20,code=compute_20", "-gencode", "arch=compute_20,code=sm_20", "-gencode", "arch=compute_30,code=compute_30", "-gencode", "arch=compute_30,code=sm_30", ], target="fatbin") fatbin's can be distributed on machines without nvcc and loaded using cuda_module = pycuda.driver.module_from_buffer(fatbin)
inducer · Jun 17, 2015 · 45f0608 · 45f0608
1 parent 52fe395
commit 45f0608
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/pycuda/compiler.py b/pycuda/compiler.py
@@ -69,7 +69,7 @@ def preprocess_source(source, options, nvcc):
 def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
     from os.path import join
 
-    assert target in ["cubin", "ptx"]
+    assert target in ["cubin", "ptx", "fatbin"]
 
     if cache_dir:
         checksum = _new_md5()
@@ -191,7 +191,7 @@ def compile(source, nvcc="nvcc", options=None, keep=False,
         no_extern_c=False, arch=None, code=None, cache_dir=None,
         include_dirs=[], target="cubin"):
 
-    assert target in ["cubin", "ptx"]
+    assert target in ["cubin", "ptx", "fatbin"]
 
     if not no_extern_c:
         source = 'extern "C" {\n%s\n}\n' % source