Skip to content

Commit 775d0d3

Browse files
authored
Merge pull request #2096 from IntelPython/update_cuda_build
Enable architecture selection for `DPCTL_TARGET_CUDA`
2 parents 35a8c26 + 5bf20fd commit 775d0d3

File tree

2 files changed

+57
-11
lines changed

2 files changed

+57
-11
lines changed

CMakeLists.txt

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS
2525
"Build dpctl pybind11 offloading extensions with coverage instrumentation"
2626
OFF
2727
)
28-
option(DPCTL_TARGET_CUDA
29-
"Build DPCTL to target CUDA devices"
30-
OFF
28+
29+
set(DPCTL_TARGET_CUDA
30+
""
31+
CACHE STRING
32+
"Build DPCTL to target CUDA device. \
33+
Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
34+
or to a specific architecture like sm_80."
3135
)
3236
set(DPCTL_TARGET_HIP
3337
""
@@ -51,15 +55,24 @@ set(_dpctl_sycl_target_compile_options)
5155
set(_dpctl_sycl_target_link_options)
5256

5357
set(_dpctl_sycl_targets)
58+
set(_dpctl_cuda_arch)
5459
set(_dpctl_amd_targets)
60+
5561
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
5662
if (DPCTL_TARGET_CUDA)
57-
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
58-
else()
59-
if (DEFINED ENV{DPCTL_TARGET_CUDA})
60-
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
63+
if(DPCTL_TARGET_CUDA MATCHES "^sm_")
64+
set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA})
65+
elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
66+
set(_dpctl_cuda_arch "sm_50")
67+
else()
68+
message(FATAL_ERROR
69+
"Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". "
70+
"Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
71+
)
6172
endif()
73+
set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown")
6274
endif()
75+
6376
if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x")
6477
set(_dpctl_amd_targets ${DPCTL_TARGET_HIP})
6578
if(_dpctl_sycl_targets)

docs/doc_sources/beginners_guides/installation.rst

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,41 @@ The following plugins from CodePlay are supported:
159159
.. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/
160160
.. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/
161161

162-
``dpctl`` can be built for CUDA devices as follows:
162+
Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler.
163+
A full list of available SYCL alias targets is available in the
164+
`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
165+
166+
CUDA build
167+
~~~~~~~~~~
168+
169+
``dpctl`` can be built for CUDA devices using the ``DPCTL_TARGET_CUDA`` CMake option,
170+
which accepts a specific compute architecture string:
171+
172+
.. code-block:: bash
173+
174+
python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80"
175+
176+
To use the default architecture (``sm_50``),
177+
set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, or ``1``:
163178

164179
.. code-block:: bash
165180
166181
python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=ON"
167182
168-
And for AMD devices
183+
Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a
184+
wider range of architectures, but limiting the usage of more recent CUDA features.
185+
186+
For reference, compute architecture strings like ``sm_80`` correspond to specific
187+
CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
188+
A complete mapping between NVIDIA GPU models and their respective
189+
Compute Capabilities can be found in the official
190+
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
191+
192+
AMD build
193+
~~~~~~~~~
194+
195+
``dpctl`` can be built for AMD devices using the ``DPCTL_TARGET_HIP`` CMake option,
196+
which requires specifying a compute architecture string:
169197

170198
.. code-block:: bash
171199
@@ -174,8 +202,13 @@ And for AMD devices
174202
Note that the `oneAPI for AMD GPUs` plugin requires the architecture be specified and only
175203
one architecture can be specified at a time.
176204

177-
It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
178-
architecture all at once:
205+
Multi-target build
206+
~~~~~~~~~~~~~~~~~~
207+
208+
The default ``dpctl`` build from the source enables support of Intel devices only.
209+
Extending the build with a custom SYCL target additionally enables support of CUDA or AMD
210+
device in ``dpctl``. Besides, the support can be also extended to enable both CUDA and AMD
211+
devices at the same time:
179212

180213
.. code-block:: bash
181214

0 commit comments

Comments
 (0)