Skip to content

Commit ef03323

Browse files
authored
[SYCL] Improve is_compatible (#9769)
Modify `is_compatible` to check if specific target is defined with `-fsycl-targets` and change the result. Previously there was a situation when kernel is compatible with the device by aspects, but actually it fails to run on this device as it was compiled for another target device. Related spec change: KhronosGroup/SYCL-Docs#381 Resolves #7561
1 parent c6a9eee commit ef03323

11 files changed

+159
-6
lines changed

sycl/source/kernel_bundle.cpp

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <detail/device_binary_image.hpp>
910
#include <detail/kernel_bundle_impl.hpp>
1011
#include <detail/kernel_id_impl.hpp>
1112
#include <detail/program_manager/program_manager.hpp>
@@ -290,12 +291,59 @@ std::vector<kernel_id> get_kernel_ids() {
290291
}
291292

292293
bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
293-
std::set<detail::RTDeviceBinaryImage *> BinImages =
294-
detail::ProgramManager::getInstance().getRawDeviceImages(KernelIDs);
295-
return std::all_of(BinImages.begin(), BinImages.end(),
296-
[&Dev](const detail::RTDeviceBinaryImage *Img) {
297-
return doesDevSupportDeviceRequirements(Dev, *Img);
298-
});
294+
if (KernelIDs.empty())
295+
return false;
296+
// TODO: also need to check that the architecture specified by the
297+
// "-fsycl-targets" flag matches the device when we are able to get the
298+
// device's arch.
299+
auto doesImageTargetMatchDevice = [](const device &Dev,
300+
const detail::RTDeviceBinaryImage &Img) {
301+
const char *Target = Img.getRawData().DeviceTargetSpec;
302+
auto BE = Dev.get_backend();
303+
// ESIMD emulator backend is only compatible with esimd kernels.
304+
if (BE == sycl::backend::ext_intel_esimd_emulator) {
305+
pi_device_binary_property Prop = Img.getProperty("isEsimdImage");
306+
return (Prop && (detail::DeviceBinaryProperty(Prop).asUint32() != 0));
307+
}
308+
if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) {
309+
return (BE == sycl::backend::opencl ||
310+
BE == sycl::backend::ext_oneapi_level_zero);
311+
} else if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) ==
312+
0) {
313+
return Dev.is_cpu();
314+
} else if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) ==
315+
0) {
316+
return Dev.is_gpu() && (BE == sycl::backend::opencl ||
317+
BE == sycl::backend::ext_oneapi_level_zero);
318+
} else if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) ==
319+
0) {
320+
return Dev.is_accelerator();
321+
} else if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64) == 0) {
322+
return BE == sycl::backend::ext_oneapi_cuda;
323+
} else if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN) == 0) {
324+
return BE == sycl::backend::ext_oneapi_hip;
325+
}
326+
327+
return false;
328+
};
329+
330+
// One kernel may be contained in several binary images depending on the
331+
// number of targets. This kernel is compatible with the device if there is
332+
// at least one image (containing this kernel) whose aspects are supported by
333+
// the device and whose target matches the device.
334+
for (const auto &KernelID : KernelIDs) {
335+
std::set<detail::RTDeviceBinaryImage *> BinImages =
336+
detail::ProgramManager::getInstance().getRawDeviceImages({KernelID});
337+
338+
if (std::none_of(BinImages.begin(), BinImages.end(),
339+
[&](const detail::RTDeviceBinaryImage *Img) {
340+
return doesDevSupportDeviceRequirements(Dev, *Img) &&
341+
doesImageTargetMatchDevice(Dev, *Img);
342+
}))
343+
return false;
344+
}
345+
346+
return true;
299347
}
300348

301349
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <sycl/sycl.hpp>
2+
3+
int main() {
4+
sycl::device dev;
5+
if (sycl::is_compatible<class Kernel>(dev)) {
6+
sycl::queue q(dev);
7+
q.submit([&](sycl::handler &cgh) {
8+
cgh.parallel_for<class Kernel>(sycl::range<1>{1},
9+
[=](sycl::id<1> Id) { int x = Id[0]; });
10+
}).wait_and_throw();
11+
return 0;
12+
}
13+
return 1;
14+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: hip_amd, opencl, gpu, cpu
2+
3+
// RUN: %clangxx -fsycl -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx906 -fsycl-targets=amdgcn-amd-amdhsa %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=hip:gpu %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} not %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// REQUIRES: esimd_emulator
2+
3+
// RUN: %clangxx -fsycl %S/Inputs/is_compatible_with_env.cpp %t_negative_case.out
4+
// RUN: env ONEAPI_DEVICE_SELECTOR=ext_intel_esimd_emulator:gpu %{run} not %t_negative_case.out
5+
6+
// RUN: %{build} -o %t.out
7+
// RUN: %{run} %t.out
8+
9+
// Just an example from
10+
// https://github.com/intel/llvm/tree/sycl/sycl/doc/extensions/experimental/sycl_ext_intel_esimd
11+
12+
#include <sycl/ext/intel/esimd.hpp>
13+
#include <sycl/sycl.hpp>
14+
15+
int main() {
16+
sycl::device dev;
17+
if (sycl::is_compatible<class Test>(dev)) {
18+
float *A = malloc_shared<float>(Size, q);
19+
float *B = malloc_shared<float>(Size, q);
20+
float *C = malloc_shared<float>(Size, q);
21+
22+
for (unsigned i = 0; i != Size; i++) {
23+
A[i] = B[i] = i;
24+
}
25+
26+
q.submit([&](handler &cgh) {
27+
cgh.parallel_for<class Test>(Size / VL,
28+
[=](id<1> i) [[intel::sycl_explicit_simd]] {
29+
auto offset = i * VL;
30+
// pointer arithmetic, so offset is in
31+
// elements:
32+
simd<float, VL> va(A + offset);
33+
simd<float, VL> vb(B + offset);
34+
simd<float, VL> vc = va + vb;
35+
vc.copy_to(C + offset);
36+
});
37+
}).wait_and_throw();
38+
return 0;
39+
}
40+
return 1;
41+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: cuda, opencl, gpu, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=cuda:gpu %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} not %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// REQUIRES: ocloc, level_zero, gpu, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga,spir64_gen -Xsycl-target-backend "-device *" %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:acc %{run} %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} %t.out
8+
// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run} %t.out
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: cuda, opencl, gpu, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64 %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=cuda:gpu %{run} not %t.out
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: opencl-aot, accelerator, gpu, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:fpga %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=*:gpu %{run} not %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: ocloc, gpu, level_zero, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run} %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: opencl-aot, cpu, gpu, level_zero
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/is_compatible_with_env.cpp -o %t.out
4+
5+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} %t.out
6+
// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} not %t.out
7+
// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run} not %t.out

0 commit comments

Comments
 (0)