From c9ad9abeeb7cc32fee82d180a5c71036e4e159f0 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 30 Oct 2024 09:40:59 -0700 Subject: [PATCH 01/30] Add tests for work group memory extension --- .../{swap_test.cpp => basic_usage.cpp} | 66 ++++- .../reduction_free_function.cpp | 263 +++++++++++++++++ .../WorkGroupMemory/reduction_lambda.cpp | 268 ++++++++++++++++++ .../WorkGroupMemory/api_misuse_test.cpp | 45 +++ .../extensions/WorkGroupMemory/api_test.cpp | 104 +++++++ 5 files changed, 731 insertions(+), 15 deletions(-) rename sycl/test-e2e/WorkGroupMemory/{swap_test.cpp => basic_usage.cpp} (88%) create mode 100644 sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp create mode 100644 sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp create mode 100644 sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp create mode 100644 sycl/test/extensions/WorkGroupMemory/api_test.cpp diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp similarity index 88% rename from sycl/test-e2e/WorkGroupMemory/swap_test.cpp rename to sycl/test-e2e/WorkGroupMemory/basic_usage.cpp index 13fbde212a47d..3a2fa5537e924 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp @@ -5,6 +5,8 @@ #include #include #include +#include + namespace syclexp = sycl::ext::oneapi::experimental; sycl::queue q; @@ -50,7 +52,9 @@ template void swap_scalar(T &a, T &b) { sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { syclexp::work_group_memory temp2; - temp2 = temp; // temp and temp2 have the same underlying data + temp2 = temp; // temp and temp2 have the same underlying data + assert(&temp2 == &temp); // check that both objects return same + // underlying address after assignment temp = acc_a[0]; acc_a[0] = acc_b[0]; acc_b[0] = temp2; // safe to use temp2 @@ -86,6 +90,8 @@ template void swap_scalar(T &a, T &b) { assert(a == old_b && b == old_a && "Incorrect swap!"); // Same as above but instead of using multi_ptr, use address-of operator. + // Also verify that get_multi_ptr() returns the same address as address-of + // operator. { sycl::buffer buf_a{&a, 1}; sycl::buffer buf_b{&b, 1}; @@ -96,6 +102,7 @@ template void swap_scalar(T &a, T &b) { syclexp::work_group_memory temp2{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + assert(&temp == temp.get_multi_ptr().get()); temp = acc_a[0]; acc_a[0] = acc_b[0]; temp2 = *(&temp); @@ -294,6 +301,8 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { temp[i][j] = acc_a[i][j]; acc_a[i][j] = acc_b[i][j]; syclexp::work_group_memory temp2{temp}; + assert(&temp2 == &temp); // check both objects return same underlying + // address after copy construction. acc_b[i][j] = temp2[i][j]; }); }); @@ -342,28 +351,28 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { // so we can verify that each work-item sees the value written by its leader. // The test also is a sanity check that different work groups get different // work group memory locations as otherwise we'd have data races. -void coherency(size_t size, size_t wgsize) { +template void coherency(size_t size, size_t wgsize) { q.submit([&](sycl::handler &cgh) { - syclexp::work_group_memory data{cgh}; + syclexp::work_group_memory data{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { if (it.get_group().leader()) { - data = it.get_global_id() / wgsize; + data = T(it.get_global_id() / wgsize); } sycl::group_barrier(it.get_group()); - assert(data == it.get_global_id() / wgsize); + assert(data == T(it.get_global_id() / wgsize)); }); }); } constexpr size_t N = 32; -int main() { - int intarr1[N][N]; - int intarr2[N][N]; +template void test() { + T intarr1[N][N]; + T intarr2[N][N]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { - intarr1[i][j] = i + j; - intarr2[i][j] = i * j; + intarr1[i][j] = T(i) + T(j); + intarr2[i][j] = T(i) * T(j); } } for (int i = 0; i < N; ++i) { @@ -373,10 +382,37 @@ int main() { swap_array_1d(intarr1[i], intarr2[i], 8); } swap_array_2d(intarr1, intarr2, 8); - coherency(N, N / 2); - coherency(N, N / 4); - coherency(N, N / 8); - coherency(N, N / 16); - coherency(N, N / 32); + coherency(N, N / 2); + coherency(N, N / 4); + coherency(N, N / 8); + coherency(N, N / 16); + coherency(N, N / 32); +} + +template void test_ptr() { + T arr1[N][N]; + T arr2[N][N]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + swap_scalar(arr1[i][j], arr2[i][j]); + } + swap_array_1d(arr1[i], arr2[i], 8); + } + swap_array_2d(arr1, arr2, 8); +} + +int main() { + test(); + test(); + test(); + if (q.get_device().has(sycl::aspect::fp16)) + test(); + test_ptr(); + test_ptr(); + test_ptr(); + test_ptr(); + if (q.get_device().has(sycl::aspect::fp16)) + test_ptr(); + test_ptr(); return 0; } diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp new file mode 100644 index 0000000000000..325f77f60c68e --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -0,0 +1,263 @@ +// REQUIRES: aspect-usm_shared_allocations +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// The name mangling for free function kernels currently does not work with PTX. +// UNSUPPORTED: cuda + +// Usage of work group memory parameters in free function kernels is not yet +// implemented. +// TODO: Remove the following directive once +// https://github.com/intel/llvm/pull/15861 is merged. +// XFAIL: * +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15927 + +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +// Basic usage reduction test using free function kernels. +// A global buffer is allocated using USM and it is passed to the kernel on the +// device. On the device, a work group memory buffer is allocated and each item +// copies the correspondng element of the global buffer to the corresponding +// element of the work group memory buffer using its global index. The leader of +// every work-group, after waiting for every work-item to complete, then sums +// these values storing the result in another work group memory object. Finally, +// each work item then verifies that the sum of the work group memory elements +// equals the sum of the global buffer elements. This is repeated for several +// data types. + +queue q; +context ctx = q.get_context(); + +constexpr size_t SIZE = 128; +constexpr size_t VEC_SIZE = 16; +constexpr float tolerance = 0.01f; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, + sycl::ext::oneapi::experimental::work_group_memory result, + T expected, size_t WGSIZE, bool UseHelper) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + if (!UseHelper) { + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i]; + } + } else { + sum_helper(mem, result, WGSIZE); + } + assert(result == expected); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM(T) \ + template void sum( \ + sycl::ext::oneapi::experimental::work_group_memory mem, T * buf, \ + sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected, size_t WGSIZE, bool UseHelper); + +SUM(int) +SUM(uint16_t) +SUM(half) +SUM(double) +SUM(float) +SUM(char) +SUM(bool) + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_marray( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr float tolerance = 0.01f; + sycl::marray &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_MARRAY(T) \ + template void sum_marray( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_MARRAY(int); +SUM_MARRAY(float); +SUM_MARRAY(double); +SUM_MARRAY(char); +SUM_MARRAY(bool); +SUM_MARRAY(half); + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_vec( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr float tolerance = 0.01f; + sycl::vec &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_VEC(T) \ + template void sum_vec( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_VEC(int); +SUM_VEC(float); +SUM_VEC(double); +SUM_VEC(char); +SUM_VEC(bool); +SUM_VEC(half); + +template void test_marray() { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + constexpr size_t WGSIZE = VEC_SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = ext::intel::math::sqrt(T(i)); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected); + cgh.parallel_for(ndr, k_sum); + }).wait(); +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test_marray(); +} + +template void test_vec() { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + constexpr size_t WGSIZE = VEC_SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = ext::intel::math::sqrt(T(i)); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected); + cgh.parallel_for(ndr, k_sum); + }).wait(); +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test_vec(); +} + +template +void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + // The following ifndef is required due to a number of limitations of free + // function kernels. See CMPLRLLVM-61498. + // TODO: Remove it once these limitations are no longer there. +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected, WGSIZE, UseHelper); + cgh.parallel_for(ndr, k_sum); + }).wait(); + +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test(SIZE, WGSIZE, UseHelper); +} + +int main() { + test(SIZE, SIZE, true /* UseHelper */); + test(SIZE, SIZE, false); + test(SIZE, SIZE / 2, false); + test(SIZE, SIZE / 4, false); + test_marray(); + test_vec(); + return 0; +} diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp new file mode 100644 index 0000000000000..e85090c6f6ad0 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -0,0 +1,268 @@ +// REQUIRES: aspect-usm_shared_allocations +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +queue q; +context ctx = q.get_context(); + +constexpr size_t SIZE = 128; + +template struct S { + T val; +}; + +template struct M { + T val; +}; + +union U { + S s; + M m; +}; + +template +void test_struct(size_t SIZE, size_t WGSIZE) { + S *buf = malloc_shared>(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i].val = T(i); + expected = expected + buf[i].val; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i].val; + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_struct(SIZE, WGSIZE); +} + +void test_union(size_t SIZE, size_t WGSIZE) { + U *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + int expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + if (i % 2) + buf[i].s = S{i}; + else + buf[i].m = M{i}; + expected = expected + (i % 2) ? buf[i].s.val : buf[i].m.val; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + (i % 2) ? mem[i].s.val : mem[i].m.val; + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); +} + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} + +template +void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + if (!UseHelper) { + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i]; + } + } else { + sum_helper(mem, result, WGSIZE); + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test(SIZE, WGSIZE, UseHelper); +} + +template void test_marray() { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + constexpr size_t WGSIZE = SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + // User-defined conversion from work group memory to underlying type is + // not applied during member access calls so we have to explicitly + // convert to the value_type ourselves. + marray &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_marray(); +} + +template void test_vec() { + if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) + return; + constexpr size_t WGSIZE = 8; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = ext::intel::math::sqrt(T(i)); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + vec &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_vec(); +} + +template void test_atomic_ref() { + assert(sizeof(T) == 4 || + (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); + constexpr size_t WGSIZE = 8; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + atomic_ref + atomic_val{result}; + if (it.get_group().leader()) { + atomic_val.store(0); + } + group_barrier(it.get_group()); + atomic_val += mem[local_id]; + group_barrier(it.get_group()); + assert(atomic_val.load() == expected); + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_atomic_ref(); +} + +int main() { + test(SIZE, SIZE /* WorkGroupSize */, + true /* UseHelper */); + test(SIZE, SIZE, false); + test(SIZE, SIZE / 2, false); + test(SIZE, SIZE / 4, false); + test(SIZE, 1, false); + test(SIZE, 2, true); + test_marray(); + test_vec(); + test_atomic_ref(); + test_struct(SIZE, 4); + test_union(SIZE, SIZE); + test_union(SIZE, SIZE / 2); + test_union(SIZE, 1); + test_union(SIZE, 2); + return 0; +} diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp new file mode 100644 index 0000000000000..1075dd9865860 --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -0,0 +1,45 @@ +// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s + +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +queue Q; + +// This test verifies the type restrictions on the two non-default constructors +// of work group memory. + +template void convertToDataT(DataT &data) {} + +template void test_bounded_arr() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}} + syclexp::work_group_memory mem{1, cgh}; + // expected-error@+1 5{{no viable overloaded '='}} + cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; }); + }); +} + +template void test_unbounded_arr() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}} + syclexp::work_group_memory mem{cgh}; + // expected-error@+1 5{{no viable overloaded '='}} + cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; }); + }); +} + +template void test() { + test_bounded_arr(); + test_unbounded_arr(); + if constexpr (sizeof...(DataTs)) + test(); +} + +int main() { + test(); + return 0; +} diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp new file mode 100644 index 0000000000000..ce1515a8abbf5 --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -0,0 +1,104 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include +#include +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +queue Q; + +struct S { + int a; + char b; +}; + +union U { + int a; + char b; +}; + +template void convertToDataT(DataT &data) {} + +template void test_constness() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + syclexp::work_group_memory + mem; // technically undefined behavior but this is a syntax only test. + cgh.parallel_for(ndr, [=](nd_item<1> it) { + const auto mem1 = mem; + // since mem1 is const, all of the following should succeed. + if constexpr (!std::is_array_v) + mem1 = DataT{}; + convertToDataT(mem1); + const auto *ptr = &mem1; + const auto &mptr = mem1.template get_multi_ptr<>(); + }); + }); +} + +template +void test_helper(syclexp::work_group_memory mem) { + static_assert( + std::is_same_v::value_type, + std::remove_all_extents_t>); + syclexp::work_group_memory dummy{mem}; + mem = dummy; + Q.submit([&](sycl::handler &cgh) { + if constexpr (sycl::detail::is_unbounded_array_v) + mem = syclexp::work_group_memory{1, cgh}; + else + mem = syclexp::work_group_memory{cgh}; + nd_range<1> ndr{1, 1}; + cgh.parallel_for(ndr, [=](nd_item<1> it) { + convertToDataT(mem); + if constexpr (!std::is_array_v) + mem = DataT{}; + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr())>); + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr<>())>); + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr())>); + + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::yes>, + decltype(mem.template get_multi_ptr())>); + }); + }); +} + +template void test() { + syclexp::work_group_memory mem; + test_constness(); + test_helper(mem); + if constexpr (sizeof...(rest)) + test(); +} + +int main() { + test(); + test, marray, marray>(); + test, vec, vec>(); + test(); + test(); + test(); + test(); + return 0; +} From bd69b8bf4390080a77df2a3f1d84a5875c0604ea Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 30 Oct 2024 09:47:16 -0700 Subject: [PATCH 02/30] Remove unused variable --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 325f77f60c68e..40878abbdc574 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -38,7 +38,6 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; constexpr size_t VEC_SIZE = 16; -constexpr float tolerance = 0.01f; template void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, From 83887beab48326629673cc8ff3d607a9d6177908 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 30 Oct 2024 13:15:10 -0400 Subject: [PATCH 03/30] Update reduction_free_function.cpp --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 40878abbdc574..f1c06268319a7 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -116,11 +116,8 @@ void sum_marray( T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ T expected); -SUM_MARRAY(int); SUM_MARRAY(float); SUM_MARRAY(double); -SUM_MARRAY(char); -SUM_MARRAY(bool); SUM_MARRAY(half); template @@ -153,11 +150,8 @@ void sum_vec( T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ T expected); -SUM_VEC(int); SUM_VEC(float); SUM_VEC(double); -SUM_VEC(char); -SUM_VEC(bool); SUM_VEC(half); template void test_marray() { From 3964f2759ddc8fb6db76507c4ae569f771afc3bc Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 09:27:40 -0700 Subject: [PATCH 04/30] Fix missing aspect runtime errors in tests --- .../reduction_free_function.cpp | 17 ++++++++--- .../WorkGroupMemory/reduction_lambda.cpp | 30 +++++++++++++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index f1c06268319a7..294b16fc4bc86 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -95,7 +95,7 @@ void sum_marray( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr float tolerance = 0.01f; + constexpr T tolerance = 0.0001; sycl::marray &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -129,7 +129,7 @@ void sum_vec( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr float tolerance = 0.01f; + constexpr T tolerance = 0.0001; sycl::vec &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -157,12 +157,15 @@ SUM_VEC(half); template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -186,12 +189,15 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -216,6 +222,9 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index e85090c6f6ad0..1adab97323e9f 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -20,13 +20,9 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { - T val; -}; +template struct S { T val; }; -template struct M { - T val; -}; +template struct M { T val; }; union U { S s; @@ -35,6 +31,11 @@ union U { template void test_struct(size_t SIZE, size_t WGSIZE) { + if (std::is_same_v && !q.get_device().has(aspect::fp16)) + return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + S *buf = malloc_shared>(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -44,7 +45,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -108,6 +109,9 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -144,12 +148,15 @@ void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -182,12 +189,15 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -217,6 +227,8 @@ template void test_vec() { template void test_atomic_ref() { assert(sizeof(T) == 4 || (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); From 69642a1b1ce783ca42dd143d5dfec2c5dc191c46 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 09:28:57 -0700 Subject: [PATCH 05/30] Revert "Fix missing aspect runtime errors in tests" This reverts commit 3964f2759ddc8fb6db76507c4ae569f771afc3bc. --- .../reduction_free_function.cpp | 17 +++-------- .../WorkGroupMemory/reduction_lambda.cpp | 30 ++++++------------- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 294b16fc4bc86..f1c06268319a7 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -95,7 +95,7 @@ void sum_marray( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr T tolerance = 0.0001; + constexpr float tolerance = 0.01f; sycl::marray &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -129,7 +129,7 @@ void sum_vec( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr T tolerance = 0.0001; + constexpr float tolerance = 0.01f; sycl::vec &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -157,15 +157,12 @@ SUM_VEC(half); template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i) / WGSIZE; + buf[i] = ext::intel::math::sqrt(T(i)); expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -189,15 +186,12 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i) / WGSIZE; + buf[i] = ext::intel::math::sqrt(T(i)); expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -222,9 +216,6 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 1adab97323e9f..e85090c6f6ad0 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -20,9 +20,13 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { T val; }; +template struct S { + T val; +}; -template struct M { T val; }; +template struct M { + T val; +}; union U { S s; @@ -31,11 +35,6 @@ union U { template void test_struct(size_t SIZE, size_t WGSIZE) { - if (std::is_same_v && !q.get_device().has(aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - S *buf = malloc_shared>(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -45,7 +44,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -109,9 +108,6 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -148,15 +144,12 @@ void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - constexpr size_t WGSIZE = SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i) / WGSIZE; + buf[i] = T(i); expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -189,15 +182,12 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i) / WGSIZE; + buf[i] = ext::intel::math::sqrt(T(i)); expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -227,8 +217,6 @@ template void test_vec() { template void test_atomic_ref() { assert(sizeof(T) == 4 || (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); From db2f720019e8347ff95ca92c112f8d3a56c596dd Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 18:29:35 +0100 Subject: [PATCH 06/30] Fix formatting --- .../WorkGroupMemory/reduction_free_function.cpp | 2 +- sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 294b16fc4bc86..386f639498145 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -2,8 +2,8 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// The name mangling for free function kernels currently does not work with PTX. // UNSUPPORTED: cuda +// UNSUPPORTED-INTENDED: The name mangling for free function kernels currently does not work with PTX. // Usage of work group memory parameters in free function kernels is not yet // implemented. diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 1adab97323e9f..40beee16a9983 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -20,9 +20,13 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { T val; }; +template struct S { + T val; +}; -template struct M { T val; }; +template struct M { + T val; +}; union U { S s; @@ -45,7 +49,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); From 2307672e51a66e27d259ca1e2462e81ada2321f0 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 18:35:52 +0100 Subject: [PATCH 07/30] Fix formatting --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 386f639498145..5005672787c31 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -3,7 +3,8 @@ // RUN: %{run} %t.out // UNSUPPORTED: cuda -// UNSUPPORTED-INTENDED: The name mangling for free function kernels currently does not work with PTX. +// UNSUPPORTED-INTENDED: The name mangling for free function kernels currently +// does not work with PTX. // Usage of work group memory parameters in free function kernels is not yet // implemented. From 553a127956ad45dbc287f8612d2100932ac83b85 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 14:59:39 -0400 Subject: [PATCH 08/30] Fix comment typo in free function kernel test --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 5005672787c31..442d567e1b342 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -27,7 +27,7 @@ using namespace sycl; // A global buffer is allocated using USM and it is passed to the kernel on the // device. On the device, a work group memory buffer is allocated and each item // copies the correspondng element of the global buffer to the corresponding -// element of the work group memory buffer using its global index. The leader of +// element of the work group memory buffer using its lcoal index. The leader of // every work-group, after waiting for every work-item to complete, then sums // these values storing the result in another work group memory object. Finally, // each work item then verifies that the sum of the work group memory elements From d8faebe8d9d2140b51b296cc0c3273967ebf2671 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 15:03:15 -0400 Subject: [PATCH 09/30] Remove ext/intel/math from includes --- sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 40beee16a9983..bcfb7bbef9bea 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include From adb2331921fe2f6ea4820ee27a0517a945a3929d Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 15:03:44 -0400 Subject: [PATCH 10/30] Remove ext/intel/math from includes --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 442d567e1b342..6205164afcd4f 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include #include From 671bea859ccbcfe4875e0efcfa47c3625fced1c5 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 15:10:17 -0400 Subject: [PATCH 11/30] Add comment regarding the limitations of free function kernels --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 6205164afcd4f..d11e070615049 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -72,6 +72,9 @@ void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, } // Explicit instantiations for the relevant data types. +// These are needed because free function kernel support is not fully +// implemented yet. +// TODO: Remove these once free function kernel support is fully there. #define SUM(T) \ template void sum( \ sycl::ext::oneapi::experimental::work_group_memory mem, T * buf, \ From 82cc19da671d56dc72f9499c57c9f67dce706cb9 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Thu, 31 Oct 2024 20:52:31 +0100 Subject: [PATCH 12/30] Fix formatting --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index d11e070615049..01c848238115d 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -73,7 +73,7 @@ void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, // Explicit instantiations for the relevant data types. // These are needed because free function kernel support is not fully -// implemented yet. +// implemented yet. // TODO: Remove these once free function kernel support is fully there. #define SUM(T) \ template void sum( \ From f231a266ec32daf5782bc507dd4935e507756c81 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 4 Nov 2024 14:36:35 -0800 Subject: [PATCH 13/30] Add another constructor that takes an argument of indeterminate_t type and add a diagnostic test for Properties passed to work group memory --- .../oneapi/experimental/work_group_memory.hpp | 25 +++++++++++++++++-- .../WorkGroupMemory/empty_properties_test.cpp | 22 ++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 7870ebd3ca73e..740fcef8c2988 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -36,6 +36,9 @@ class work_group_memory_impl { } // namespace detail namespace ext::oneapi::experimental { +struct indeterminate_t {}; +inline constexpr indeterminate_t indeterminate; + template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl { @@ -43,22 +46,40 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory using value_type = std::remove_all_extents_t; private: + // At the moment we do not have a way to set properties nor property values to + // set for work group memory. So, we check here for diagnostic purposes that + // the property list is empty. + void check_props_empty() const { + static_assert(std::is_same_v && + "Work group memory class does not support properties yet!"); + } using decoratedPtr = typename sycl::detail::DecoratedType< value_type, access::address_space::local_space>::type *; public: +// Frontend requires special types to have a default constructor in device +// compilation mode in order to have a unified way of initializing an object and +// then calling init method on it. This is an implementation detail and not part +// of the spec. +#ifdef __SYCL_DEVICE_ONLY__ work_group_memory() = default; +#endif + work_group_memory(const indeterminate_t &) { check_props_empty(); } work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(DataT)) {} + : sycl::detail::work_group_memory_impl(sizeof(DataT)) { + check_props_empty(); + } template >> work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( - num * sizeof(std::remove_extent_t)) {} + num * sizeof(std::remove_extent_t)) { + check_props_empty(); + } template multi_ptr get_multi_ptr() const { diff --git a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp new file mode 100644 index 0000000000000..8464b29cb3752 --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp @@ -0,0 +1,22 @@ +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +// This test checks that a diagnostic is emitted when +// instantiating a work group memory class with the properties set to anything +// other than empty_properties_t + +template +void test_properties() { + // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties>>'}} + syclexp::work_group_memory{syclexp::indeterminate}; + if constexpr (sizeof...(PropertyListTs)) + test_properties(); +} + +int main() { + test_properties(); + return 0; +} From 209a5b75b7045d0b1686755b34006c222f7a9354 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 4 Nov 2024 14:39:35 -0800 Subject: [PATCH 14/30] Add TODOs to remove diagnostic once feature is supported --- sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 740fcef8c2988..9fc5b1655eb53 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -49,6 +49,8 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory // At the moment we do not have a way to set properties nor property values to // set for work group memory. So, we check here for diagnostic purposes that // the property list is empty. + // TODO: Remove this function and its occurrences in this file once properties + // have been created for work group memory. void check_props_empty() const { static_assert(std::is_same_v && "Work group memory class does not support properties yet!"); From 8c203ef4dddaa9144145a06552946f699cc1096e Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Mon, 4 Nov 2024 14:49:04 -0800 Subject: [PATCH 15/30] Fix include fails --- .../WorkGroupMemory/reduction_free_function.cpp | 4 ++-- sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp | 14 +++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index d583f71c93bab..a6388e8115bcf 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -165,7 +165,7 @@ template void test_marray() { assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -194,7 +194,7 @@ template void test_vec() { assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 86590990d6f25..4ca632166a58b 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -19,13 +19,9 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { - T val; -}; +template struct S { T val; }; -template struct M { - T val; -}; +template struct M { T val; }; union U { S s; @@ -43,7 +39,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -148,7 +144,7 @@ template void test_marray() { assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = T(i); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; @@ -186,7 +182,7 @@ template void test_vec() { assert(buf && "Shared USM allocation failed!"); T expected = 0; for (int i = 0; i < WGSIZE; ++i) { - buf[i] = ext::intel::math::sqrt(T(i)); + buf[i] = T(i) / WGSIZE; expected = expected + buf[i]; } nd_range ndr{{SIZE}, {WGSIZE}}; From 3ef139aab79f518bbbb0c45d639739ee1016df0a Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 5 Nov 2024 06:42:49 -0800 Subject: [PATCH 16/30] Update tests to conform to the spec --- sycl/test-e2e/WorkGroupMemory/basic_usage.cpp | 4 +-- .../reduction_free_function.cpp | 13 ++++++++-- .../WorkGroupMemory/reduction_lambda.cpp | 26 ++++++++++++++++--- .../WorkGroupMemory/api_misuse_test.cpp | 7 ++--- .../extensions/WorkGroupMemory/api_test.cpp | 2 +- 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp index 3a2fa5537e924..53c699e41ca8e 100644 --- a/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp +++ b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp @@ -51,7 +51,7 @@ template void swap_scalar(T &a, T &b) { syclexp::work_group_memory temp{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { - syclexp::work_group_memory temp2; + syclexp::work_group_memory temp2{ syclexp::indeterminate }; temp2 = temp; // temp and temp2 have the same underlying data assert(&temp2 == &temp); // check that both objects return same // underlying address after assignment @@ -271,7 +271,7 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { const auto j = it.get_global_id()[1]; temp[i][j] = acc_a[i][j]; acc_a[i][j] = acc_b[i][j]; - syclexp::work_group_memory temp2; + syclexp::work_group_memory temp2{ syclexp::indeterminate }; temp2 = temp; acc_b[i][j] = temp2[i][j]; }); diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index a6388e8115bcf..01c848238115d 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -98,7 +98,7 @@ void sum_marray( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr float tolerance = 0.01f; + constexpr T tolerance = 0.0001; sycl::marray &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -132,7 +132,7 @@ void sum_vec( T expected) { const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); size_t local_id = it.get_local_id(); - constexpr float tolerance = 0.01f; + constexpr T tolerance = 0.0001; sycl::vec &data = mem; data[local_id] = buf[local_id]; group_barrier(it.get_group()); @@ -160,6 +160,9 @@ SUM_VEC(half); template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -189,6 +192,9 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = VEC_SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -219,6 +225,9 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 4ca632166a58b..bcfb7bbef9bea 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -19,9 +19,13 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { T val; }; +template struct S { + T val; +}; -template struct M { T val; }; +template struct M { + T val; +}; union U { S s; @@ -30,6 +34,11 @@ union U { template void test_struct(size_t SIZE, size_t WGSIZE) { + if (std::is_same_v && !q.get_device().has(aspect::fp16)) + return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + S *buf = malloc_shared>(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -39,7 +48,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -103,6 +112,9 @@ template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -139,6 +151,9 @@ void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -177,6 +192,9 @@ template void test_marray() { template void test_vec() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; + constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -212,6 +230,8 @@ template void test_vec() { template void test_atomic_ref() { assert(sizeof(T) == 4 || (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); + if (std::is_same_v && !q.get_device().has(aspect::fp64)) + return; constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp index 1075dd9865860..0485afdc30c13 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -1,5 +1,4 @@ -// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s - +// RUN: %clangxx -fsycl -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s #include using namespace sycl; @@ -8,7 +7,9 @@ namespace syclexp = sycl::ext::oneapi::experimental; queue Q; // This test verifies the type restrictions on the two non-default constructors -// of work group memory. +// of work group memory. It also checks that a diagnostic is emitted when +// instantiating a work group memory class with the properties set to anything +// other than empty_properties_t template void convertToDataT(DataT &data) {} diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp index ce1515a8abbf5..9996b2371bb3c 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -25,7 +25,7 @@ template void test_constness() { Q.submit([&](sycl::handler &cgh) { nd_range<1> ndr{1, 1}; syclexp::work_group_memory - mem; // technically undefined behavior but this is a syntax only test. + mem{ syclexp::indeterminate }; cgh.parallel_for(ndr, [=](nd_item<1> it) { const auto mem1 = mem; // since mem1 is const, all of the following should succeed. From faa3382f1bcd5fc653254c26fb1321f4cc969751 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 5 Nov 2024 06:47:02 -0800 Subject: [PATCH 17/30] Update tests to conform to the spec --- sycl/test/extensions/WorkGroupMemory/api_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp index 9996b2371bb3c..05d69fc88d726 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -85,7 +85,7 @@ void test_helper(syclexp::work_group_memory mem) { } template void test() { - syclexp::work_group_memory mem; + syclexp::work_group_memory mem{ syclexp::indeterminate }; test_constness(); test_helper(mem); if constexpr (sizeof...(rest)) From 5c0c4b3fc0a88cb3c0ad807d1e3724b29b816ca6 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 5 Nov 2024 07:12:44 -0800 Subject: [PATCH 18/30] Formatting changes --- sycl/test-e2e/WorkGroupMemory/basic_usage.cpp | 4 ++-- sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp | 7 +++---- sycl/test/extensions/WorkGroupMemory/api_test.cpp | 7 ++----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp index 53c699e41ca8e..4dbc2073d7009 100644 --- a/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp +++ b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp @@ -51,7 +51,7 @@ template void swap_scalar(T &a, T &b) { syclexp::work_group_memory temp{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { - syclexp::work_group_memory temp2{ syclexp::indeterminate }; + syclexp::work_group_memory temp2{syclexp::indeterminate}; temp2 = temp; // temp and temp2 have the same underlying data assert(&temp2 == &temp); // check that both objects return same // underlying address after assignment @@ -271,7 +271,7 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { const auto j = it.get_global_id()[1]; temp[i][j] = acc_a[i][j]; acc_a[i][j] = acc_b[i][j]; - syclexp::work_group_memory temp2{ syclexp::indeterminate }; + syclexp::work_group_memory temp2{syclexp::indeterminate}; temp2 = temp; acc_b[i][j] = temp2[i][j]; }); diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp index 0485afdc30c13..1075dd9865860 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -1,4 +1,5 @@ -// RUN: %clangxx -fsycl -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s +// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s + #include using namespace sycl; @@ -7,9 +8,7 @@ namespace syclexp = sycl::ext::oneapi::experimental; queue Q; // This test verifies the type restrictions on the two non-default constructors -// of work group memory. It also checks that a diagnostic is emitted when -// instantiating a work group memory class with the properties set to anything -// other than empty_properties_t +// of work group memory. template void convertToDataT(DataT &data) {} diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp index 05d69fc88d726..c1adfac6c56f5 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -1,5 +1,3 @@ -// RUN: %clangxx -fsycl -fsyntax-only %s - #include #include #include @@ -24,8 +22,7 @@ template void convertToDataT(DataT &data) {} template void test_constness() { Q.submit([&](sycl::handler &cgh) { nd_range<1> ndr{1, 1}; - syclexp::work_group_memory - mem{ syclexp::indeterminate }; + syclexp::work_group_memory mem{syclexp::indeterminate}; cgh.parallel_for(ndr, [=](nd_item<1> it) { const auto mem1 = mem; // since mem1 is const, all of the following should succeed. @@ -85,7 +82,7 @@ void test_helper(syclexp::work_group_memory mem) { } template void test() { - syclexp::work_group_memory mem{ syclexp::indeterminate }; + syclexp::work_group_memory mem; test_constness(); test_helper(mem); if constexpr (sizeof...(rest)) From df3902fb9fbf48e66b73d247c8373c8b225b6989 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Tue, 5 Nov 2024 07:17:34 -0800 Subject: [PATCH 19/30] Formatting changes --- sycl/test/extensions/WorkGroupMemory/api_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp index c1adfac6c56f5..81f7f9d01293d 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -1,3 +1,4 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s #include #include #include @@ -82,7 +83,7 @@ void test_helper(syclexp::work_group_memory mem) { } template void test() { - syclexp::work_group_memory mem; + syclexp::work_group_memory mem{syclexp::indeterminate}; test_constness(); test_helper(mem); if constexpr (sizeof...(rest)) From 61944fcff2e8447bc513e34269e5bc7a50ca3a83 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 6 Nov 2024 11:53:43 -0500 Subject: [PATCH 20/30] Remove error limit from WorkGroupMemory test Co-authored-by: Steffen Larsen --- sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp index 1075dd9865860..bce664df72d3c 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=30 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s +// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=0 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s #include From bb63b54d018f6b9ff0783aa67ed0dedf79ad7dda Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 6 Nov 2024 09:01:35 -0800 Subject: [PATCH 21/30] Remove indeterminate change and add it in a separate PR --- .../sycl/ext/oneapi/experimental/work_group_memory.hpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 9fc5b1655eb53..c07523d0f3e84 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -36,9 +36,6 @@ class work_group_memory_impl { } // namespace detail namespace ext::oneapi::experimental { -struct indeterminate_t {}; -inline constexpr indeterminate_t indeterminate; - template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl { @@ -59,14 +56,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory value_type, access::address_space::local_space>::type *; public: -// Frontend requires special types to have a default constructor in device -// compilation mode in order to have a unified way of initializing an object and -// then calling init method on it. This is an implementation detail and not part -// of the spec. -#ifdef __SYCL_DEVICE_ONLY__ work_group_memory() = default; -#endif - work_group_memory(const indeterminate_t &) { check_props_empty(); } work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; template Date: Wed, 6 Nov 2024 12:42:49 -0500 Subject: [PATCH 22/30] Apply suggested changes to api_misuse_test.cpp Co-authored-by: Steffen Larsen --- sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp index bce664df72d3c..d79067d34079b 100644 --- a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -32,11 +32,9 @@ template void test_unbounded_arr() { }); } -template void test() { - test_bounded_arr(); - test_unbounded_arr(); - if constexpr (sizeof...(DataTs)) - test(); +template void test() { + (test_bounded_arr(), ...); + (test_unbounded_arr(), ...); } int main() { From 3068b89b80bf1be0dc8bbb42a17c37d3d7af77de Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 6 Nov 2024 09:56:25 -0800 Subject: [PATCH 23/30] Add unsupported tracker for cuda failures --- sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 01c848238115d..d89c37cf64a50 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -3,13 +3,8 @@ // RUN: %{run} %t.out // UNSUPPORTED: cuda -// UNSUPPORTED-INTENDED: The name mangling for free function kernels currently -// does not work with PTX. +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16004 -// Usage of work group memory parameters in free function kernels is not yet -// implemented. -// TODO: Remove the following directive once -// https://github.com/intel/llvm/pull/15861 is merged. // XFAIL: * // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15927 From de090f7d19abf4dc040376a4bdbeca87463e7941 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 6 Nov 2024 21:39:54 -0800 Subject: [PATCH 24/30] Refactor tests by pulling out common functionality --- .../WorkGroupMemory/common_free_function.hpp | 128 ++++++++++++++++++ .../WorkGroupMemory/common_lambda.hpp | 31 +++++ .../reduction_free_function.cpp | 127 +---------------- .../WorkGroupMemory/reduction_lambda.cpp | 36 +---- 4 files changed, 162 insertions(+), 160 deletions(-) create mode 100644 sycl/test-e2e/WorkGroupMemory/common_free_function.hpp create mode 100644 sycl/test-e2e/WorkGroupMemory/common_lambda.hpp diff --git a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp new file mode 100644 index 0000000000000..2bb314e534019 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp @@ -0,0 +1,128 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, + sycl::ext::oneapi::experimental::work_group_memory result, + T expected, size_t WGSIZE, bool UseHelper) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + if (!UseHelper) { + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i]; + } + } else { + sum_helper(mem, result, WGSIZE); + } + assert(result == expected); + } +} + +// Explicit instantiations for the relevant data types. +// These are needed because free function kernel support is not fully +// implemented yet. +// TODO: Remove these once free function kernel support is fully there. +#define SUM(T) \ + template void sum( \ + sycl::ext::oneapi::experimental::work_group_memory mem, T * buf, \ + sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected, size_t WGSIZE, bool UseHelper); + +SUM(int) +SUM(uint16_t) +SUM(half) +SUM(double) +SUM(float) +SUM(char) +SUM(bool) + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_marray( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + sycl::marray &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_MARRAY(T) \ + template void sum_marray( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_MARRAY(float); +SUM_MARRAY(double); +SUM_MARRAY(half); + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_vec( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + sycl::vec &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_VEC(T) \ + template void sum_vec( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_VEC(float); +SUM_VEC(double); +SUM_VEC(half); diff --git a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp new file mode 100644 index 0000000000000..ee7c253960b1c --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template struct S { T val; }; + +template struct M { T val; }; + +union U { + S s; + M m; +}; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index d89c37cf64a50..3f52e7e90da63 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -8,14 +8,7 @@ // XFAIL: * // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15927 -#include -#include -#include -#include -#include -#include - -using namespace sycl; +#include "common_free_function.hpp" // Basic usage reduction test using free function kernels. // A global buffer is allocated using USM and it is passed to the kernel on the @@ -34,124 +27,6 @@ context ctx = q.get_context(); constexpr size_t SIZE = 128; constexpr size_t VEC_SIZE = 16; -template -void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, - sycl::ext::oneapi::experimental::work_group_memory ret, - size_t WGSIZE) { - for (int i = 0; i < WGSIZE; ++i) { - ret = ret + mem[i]; - } -} - -template -SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (ext::oneapi::experimental::nd_range_kernel<1>)) -void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, - sycl::ext::oneapi::experimental::work_group_memory result, - T expected, size_t WGSIZE, bool UseHelper) { - const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); - size_t local_id = it.get_local_id(); - mem[local_id] = buf[local_id]; - group_barrier(it.get_group()); - if (it.get_group().leader()) { - result = 0; - if (!UseHelper) { - for (int i = 0; i < WGSIZE; ++i) { - result = result + mem[i]; - } - } else { - sum_helper(mem, result, WGSIZE); - } - assert(result == expected); - } -} - -// Explicit instantiations for the relevant data types. -// These are needed because free function kernel support is not fully -// implemented yet. -// TODO: Remove these once free function kernel support is fully there. -#define SUM(T) \ - template void sum( \ - sycl::ext::oneapi::experimental::work_group_memory mem, T * buf, \ - sycl::ext::oneapi::experimental::work_group_memory result, \ - T expected, size_t WGSIZE, bool UseHelper); - -SUM(int) -SUM(uint16_t) -SUM(half) -SUM(double) -SUM(float) -SUM(char) -SUM(bool) - -template -SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (ext::oneapi::experimental::nd_range_kernel<1>)) -void sum_marray( - sycl::ext::oneapi::experimental::work_group_memory> mem, - T *buf, sycl::ext::oneapi::experimental::work_group_memory result, - T expected) { - const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); - size_t local_id = it.get_local_id(); - constexpr T tolerance = 0.0001; - sycl::marray &data = mem; - data[local_id] = buf[local_id]; - group_barrier(it.get_group()); - if (it.get_group().leader()) { - result = 0; - for (int i = 0; i < 16; ++i) { - result = result + data[i]; - } - assert((result - expected) * (result - expected) <= tolerance); - } -} - -// Explicit instantiations for the relevant data types. -#define SUM_MARRAY(T) \ - template void sum_marray( \ - sycl::ext::oneapi::experimental::work_group_memory> \ - mem, \ - T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ - T expected); - -SUM_MARRAY(float); -SUM_MARRAY(double); -SUM_MARRAY(half); - -template -SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (ext::oneapi::experimental::nd_range_kernel<1>)) -void sum_vec( - sycl::ext::oneapi::experimental::work_group_memory> mem, - T *buf, sycl::ext::oneapi::experimental::work_group_memory result, - T expected) { - const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); - size_t local_id = it.get_local_id(); - constexpr T tolerance = 0.0001; - sycl::vec &data = mem; - data[local_id] = buf[local_id]; - group_barrier(it.get_group()); - if (it.get_group().leader()) { - result = 0; - for (int i = 0; i < 16; ++i) { - result = result + data[i]; - } - assert((result - expected) * (result - expected) <= tolerance); - } -} - -// Explicit instantiations for the relevant data types. -#define SUM_VEC(T) \ - template void sum_vec( \ - sycl::ext::oneapi::experimental::work_group_memory> \ - mem, \ - T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ - T expected); - -SUM_VEC(float); -SUM_VEC(double); -SUM_VEC(half); - template void test_marray() { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) return; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index bcfb7bbef9bea..598f3507dc438 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -2,36 +2,13 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace sycl; +#include "common_lambda.hpp" queue q; context ctx = q.get_context(); constexpr size_t SIZE = 128; -template struct S { - T val; -}; - -template struct M { - T val; -}; - -union U { - S s; - M m; -}; - template void test_struct(size_t SIZE, size_t WGSIZE) { if (std::is_same_v && !q.get_device().has(aspect::fp16)) @@ -48,7 +25,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -99,15 +76,6 @@ void test_union(size_t SIZE, size_t WGSIZE) { free(buf, q); } -template -void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, - sycl::ext::oneapi::experimental::work_group_memory ret, - size_t WGSIZE) { - for (int i = 0; i < WGSIZE; ++i) { - ret = ret + mem[i]; - } -} - template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) From 07d1220c7ec6780b729a0ea5609a38a5762ad24e Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Wed, 6 Nov 2024 22:06:59 -0800 Subject: [PATCH 25/30] Print message about skipping tests when aspect not supported for a certain data type --- sycl/test-e2e/WorkGroupMemory/common.hpp | 53 +++++++++++++++++++ .../WorkGroupMemory/common_free_function.hpp | 10 +--- .../reduction_free_function.cpp | 12 ++--- .../WorkGroupMemory/reduction_lambda.cpp | 24 ++------- 4 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 sycl/test-e2e/WorkGroupMemory/common.hpp diff --git a/sycl/test-e2e/WorkGroupMemory/common.hpp b/sycl/test-e2e/WorkGroupMemory/common.hpp new file mode 100644 index 0000000000000..069dc488fc774 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template bool check_half_aspect(queue &q) { + if (std::is_same_v && + !q.get_device().has(sycl::aspect::fp16)) { + std::cout << "Device does not support fp16 aspect. Skipping all tests with " + "sycl::half type!" + << std::endl; + return false; + } + return true; +} + +template bool check_double_aspect(queue &q) { + if (std::is_same_v && !q.get_device().has(aspect::fp64)) { + std::cout << "Device does not support fp64 aspect. Skipping all tests with " + "double type!" + << std::endl; + return false; + } + return true; +} + +template struct S { T val; }; + +template struct M { T val; }; + +union U { + S s; + M m; +}; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} diff --git a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp index 2bb314e534019..6d5e1eced6947 100644 --- a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp +++ b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp @@ -6,18 +6,10 @@ #include #include #include +#include "common.hpp" using namespace sycl; -template -void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, - sycl::ext::oneapi::experimental::work_group_memory ret, - size_t WGSIZE) { - for (int i = 0; i < WGSIZE; ++i) { - ret = ret + mem[i]; - } -} - template SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( (ext::oneapi::experimental::nd_range_kernel<1>)) diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp index 3f52e7e90da63..ff2aa8aa19385 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -28,9 +28,7 @@ constexpr size_t SIZE = 128; constexpr size_t VEC_SIZE = 16; template void test_marray() { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; constexpr size_t WGSIZE = VEC_SIZE; @@ -60,9 +58,7 @@ template void test_marray() { } template void test_vec() { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; constexpr size_t WGSIZE = VEC_SIZE; @@ -93,9 +89,7 @@ template void test_vec() { template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; T *buf = malloc_shared(WGSIZE, q); diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index 598f3507dc438..a3194eff99a5d 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include "common_lambda.hpp" +#include "common.hpp" queue q; context ctx = q.get_context(); @@ -11,11 +11,8 @@ constexpr size_t SIZE = 128; template void test_struct(size_t SIZE, size_t WGSIZE) { - if (std::is_same_v && !q.get_device().has(aspect::fp16)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; - S *buf = malloc_shared>(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -78,11 +75,8 @@ void test_union(size_t SIZE, size_t WGSIZE) { template void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; - T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); T expected = 0; @@ -117,11 +111,8 @@ void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { } template void test_marray() { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; - constexpr size_t WGSIZE = SIZE; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -158,11 +149,8 @@ template void test_marray() { } template void test_vec() { - if (std::is_same_v && !q.get_device().has(sycl::aspect::fp16)) - return; - if (std::is_same_v && !q.get_device().has(aspect::fp64)) + if (!check_half_aspect(q) || !check_double_aspect(q)) return; - constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); @@ -198,8 +186,6 @@ template void test_vec() { template void test_atomic_ref() { assert(sizeof(T) == 4 || (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); - if (std::is_same_v && !q.get_device().has(aspect::fp64)) - return; constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); From 7595770a1e2a01fdcc84e9bcd3da39ce3dd6e121 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 15 Nov 2024 10:02:15 -0800 Subject: [PATCH 26/30] Add missing checks for aspect fp64 --- sycl/test-e2e/WorkGroupMemory/common_lambda.hpp | 10 +++++++--- sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp | 11 ++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp index ee7c253960b1c..8fb10c23df011 100644 --- a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp +++ b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp @@ -12,9 +12,13 @@ using namespace sycl; -template struct S { T val; }; - -template struct M { T val; }; +template struct S { + T val; +}; + +template struct M { + T val; +}; union U { S s; diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp index a3194eff99a5d..5759e86f17fe7 100644 --- a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -22,7 +22,7 @@ void test_struct(size_t SIZE, size_t WGSIZE) { } nd_range ndr{{SIZE}, {WGSIZE}}; q.submit([&](sycl::handler &cgh) { - ext::oneapi::experimental::work_group_memory[]> mem { WGSIZE, cgh }; + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; ext::oneapi::experimental ::work_group_memory result{cgh}; cgh.parallel_for(ndr, [=](nd_item<> it) { size_t local_id = it.get_local_id(); @@ -184,8 +184,13 @@ template void test_vec() { } template void test_atomic_ref() { - assert(sizeof(T) == 4 || - (sizeof(T) == 8 && q.get_device().has(aspect::atomic64))); + if (!(sizeof(T) == 4 || + (sizeof(T) == 8 && q.get_device().has(aspect::atomic64)))) { + std::cout << "Invalid type used with atomic_ref!\nSkipping the test!"; + return; + } + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; constexpr size_t WGSIZE = 8; T *buf = malloc_shared(WGSIZE, q); assert(buf && "Shared USM allocation failed!"); From 67aa143770cc507cb2f99927037a6aa743853aa5 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 15 Nov 2024 10:24:15 -0800 Subject: [PATCH 27/30] Fix formatting --- sycl/test-e2e/WorkGroupMemory/common.hpp | 8 ++++++-- .../WorkGroupMemory/common_free_function.hpp | 3 ++- sycl/test-e2e/WorkGroupMemory/common_lambda.hpp | 13 ------------- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/sycl/test-e2e/WorkGroupMemory/common.hpp b/sycl/test-e2e/WorkGroupMemory/common.hpp index 069dc488fc774..64452745ede1b 100644 --- a/sycl/test-e2e/WorkGroupMemory/common.hpp +++ b/sycl/test-e2e/WorkGroupMemory/common.hpp @@ -34,9 +34,13 @@ template bool check_double_aspect(queue &q) { return true; } -template struct S { T val; }; +template struct S { + T val; +}; -template struct M { T val; }; +template struct M { + T val; +}; union U { S s; diff --git a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp index 6d5e1eced6947..e13f50214593d 100644 --- a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp +++ b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp @@ -1,12 +1,13 @@ #pragma once +#include "common.hpp" +#include "common_lambda.hpp" #include #include #include #include #include #include -#include "common.hpp" using namespace sycl; diff --git a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp index 8fb10c23df011..f5c8b6651ffcc 100644 --- a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp +++ b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp @@ -12,19 +12,6 @@ using namespace sycl; -template struct S { - T val; -}; - -template struct M { - T val; -}; - -union U { - S s; - M m; -}; - template void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, sycl::ext::oneapi::experimental::work_group_memory ret, From f1d99fa378060d5a2b8af18a35b60cb9ac5c3442 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 15 Nov 2024 11:28:53 -0800 Subject: [PATCH 28/30] Add check for empty properties --- sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 7a3b4918d5a20..c156c484f539d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -71,7 +71,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory #endif public: - work_group_memory(const indeterminate_t &) {}; + work_group_memory(const indeterminate_t &) { check_props_empty(); }; work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; template Date: Fri, 15 Nov 2024 11:52:24 -0800 Subject: [PATCH 29/30] Fix error in test logic --- sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp index 8464b29cb3752..9509140afde84 100644 --- a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp @@ -10,7 +10,7 @@ namespace syclexp = sycl::ext::oneapi::experimental; template void test_properties() { - // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties>>'}} + // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties<{{.*}}>>'}} syclexp::work_group_memory{syclexp::indeterminate}; if constexpr (sizeof...(PropertyListTs)) test_properties(); From 7ecab94d45b265458f8e42e28371b99ec557b551 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi Date: Fri, 15 Nov 2024 11:54:33 -0800 Subject: [PATCH 30/30] Fix error in test logic --- sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp index 9509140afde84..3d11b6d4ecbef 100644 --- a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp +++ b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp @@ -10,7 +10,7 @@ namespace syclexp = sycl::ext::oneapi::experimental; template void test_properties() { - // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties<{{.*}}>>'}} + // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties>>'}} syclexp::work_group_memory{syclexp::indeterminate}; if constexpr (sizeof...(PropertyListTs)) test_properties();