diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 0be24c912907b..c156c484f539d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -46,6 +46,15 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory using value_type = std::remove_all_extents_t; private: + // At the moment we do not have a way to set properties nor property values to + // set for work group memory. So, we check here for diagnostic purposes that + // the property list is empty. + // TODO: Remove this function and its occurrences in this file once properties + // have been created for work group memory. + void check_props_empty() const { + static_assert(std::is_same_v && + "Work group memory class does not support properties yet!"); + } using decoratedPtr = typename sycl::detail::DecoratedType< value_type, access::address_space::local_space>::type *; @@ -62,18 +71,22 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory #endif public: - work_group_memory(const indeterminate_t &) {}; + work_group_memory(const indeterminate_t &) { check_props_empty(); }; work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(DataT)) {} + : sycl::detail::work_group_memory_impl(sizeof(DataT)) { + check_props_empty(); + } template >> work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( - num * sizeof(std::remove_extent_t)) {} + num * sizeof(std::remove_extent_t)) { + check_props_empty(); + } template multi_ptr get_multi_ptr() const { diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp similarity index 88% rename from sycl/test-e2e/WorkGroupMemory/swap_test.cpp rename to sycl/test-e2e/WorkGroupMemory/basic_usage.cpp index 7552774edcbbf..4dbc2073d7009 100644 --- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp +++ b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp @@ -5,6 +5,8 @@ #include #include #include +#include + namespace syclexp = sycl::ext::oneapi::experimental; sycl::queue q; @@ -50,7 +52,9 @@ template void swap_scalar(T &a, T &b) { sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { syclexp::work_group_memory temp2{syclexp::indeterminate}; - temp2 = temp; // temp and temp2 have the same underlying data + temp2 = temp; // temp and temp2 have the same underlying data + assert(&temp2 == &temp); // check that both objects return same + // underlying address after assignment temp = acc_a[0]; acc_a[0] = acc_b[0]; acc_b[0] = temp2; // safe to use temp2 @@ -86,6 +90,8 @@ template void swap_scalar(T &a, T &b) { assert(a == old_b && b == old_a && "Incorrect swap!"); // Same as above but instead of using multi_ptr, use address-of operator. + // Also verify that get_multi_ptr() returns the same address as address-of + // operator. { sycl::buffer buf_a{&a, 1}; sycl::buffer buf_b{&b, 1}; @@ -96,6 +102,7 @@ template void swap_scalar(T &a, T &b) { syclexp::work_group_memory temp2{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { + assert(&temp == temp.get_multi_ptr().get()); temp = acc_a[0]; acc_a[0] = acc_b[0]; temp2 = *(&temp); @@ -294,6 +301,8 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { temp[i][j] = acc_a[i][j]; acc_a[i][j] = acc_b[i][j]; syclexp::work_group_memory temp2{temp}; + assert(&temp2 == &temp); // check both objects return same underlying + // address after copy construction. acc_b[i][j] = temp2[i][j]; }); }); @@ -342,28 +351,28 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) { // so we can verify that each work-item sees the value written by its leader. // The test also is a sanity check that different work groups get different // work group memory locations as otherwise we'd have data races. -void coherency(size_t size, size_t wgsize) { +template void coherency(size_t size, size_t wgsize) { q.submit([&](sycl::handler &cgh) { - syclexp::work_group_memory data{cgh}; + syclexp::work_group_memory data{cgh}; sycl::nd_range<1> ndr{size, wgsize}; cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) { if (it.get_group().leader()) { - data = it.get_global_id() / wgsize; + data = T(it.get_global_id() / wgsize); } sycl::group_barrier(it.get_group()); - assert(data == it.get_global_id() / wgsize); + assert(data == T(it.get_global_id() / wgsize)); }); }); } constexpr size_t N = 32; -int main() { - int intarr1[N][N]; - int intarr2[N][N]; +template void test() { + T intarr1[N][N]; + T intarr2[N][N]; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { - intarr1[i][j] = i + j; - intarr2[i][j] = i * j; + intarr1[i][j] = T(i) + T(j); + intarr2[i][j] = T(i) * T(j); } } for (int i = 0; i < N; ++i) { @@ -373,10 +382,37 @@ int main() { swap_array_1d(intarr1[i], intarr2[i], 8); } swap_array_2d(intarr1, intarr2, 8); - coherency(N, N / 2); - coherency(N, N / 4); - coherency(N, N / 8); - coherency(N, N / 16); - coherency(N, N / 32); + coherency(N, N / 2); + coherency(N, N / 4); + coherency(N, N / 8); + coherency(N, N / 16); + coherency(N, N / 32); +} + +template void test_ptr() { + T arr1[N][N]; + T arr2[N][N]; + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + swap_scalar(arr1[i][j], arr2[i][j]); + } + swap_array_1d(arr1[i], arr2[i], 8); + } + swap_array_2d(arr1, arr2, 8); +} + +int main() { + test(); + test(); + test(); + if (q.get_device().has(sycl::aspect::fp16)) + test(); + test_ptr(); + test_ptr(); + test_ptr(); + test_ptr(); + if (q.get_device().has(sycl::aspect::fp16)) + test_ptr(); + test_ptr(); return 0; } diff --git a/sycl/test-e2e/WorkGroupMemory/common.hpp b/sycl/test-e2e/WorkGroupMemory/common.hpp new file mode 100644 index 0000000000000..64452745ede1b --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template bool check_half_aspect(queue &q) { + if (std::is_same_v && + !q.get_device().has(sycl::aspect::fp16)) { + std::cout << "Device does not support fp16 aspect. Skipping all tests with " + "sycl::half type!" + << std::endl; + return false; + } + return true; +} + +template bool check_double_aspect(queue &q) { + if (std::is_same_v && !q.get_device().has(aspect::fp64)) { + std::cout << "Device does not support fp64 aspect. Skipping all tests with " + "double type!" + << std::endl; + return false; + } + return true; +} + +template struct S { + T val; +}; + +template struct M { + T val; +}; + +union U { + S s; + M m; +}; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} diff --git a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp new file mode 100644 index 0000000000000..e13f50214593d --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp @@ -0,0 +1,121 @@ +#pragma once + +#include "common.hpp" +#include "common_lambda.hpp" +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum(sycl::ext::oneapi::experimental::work_group_memory mem, T *buf, + sycl::ext::oneapi::experimental::work_group_memory result, + T expected, size_t WGSIZE, bool UseHelper) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + if (!UseHelper) { + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i]; + } + } else { + sum_helper(mem, result, WGSIZE); + } + assert(result == expected); + } +} + +// Explicit instantiations for the relevant data types. +// These are needed because free function kernel support is not fully +// implemented yet. +// TODO: Remove these once free function kernel support is fully there. +#define SUM(T) \ + template void sum( \ + sycl::ext::oneapi::experimental::work_group_memory mem, T * buf, \ + sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected, size_t WGSIZE, bool UseHelper); + +SUM(int) +SUM(uint16_t) +SUM(half) +SUM(double) +SUM(float) +SUM(char) +SUM(bool) + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_marray( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + sycl::marray &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_MARRAY(T) \ + template void sum_marray( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_MARRAY(float); +SUM_MARRAY(double); +SUM_MARRAY(half); + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void sum_vec( + sycl::ext::oneapi::experimental::work_group_memory> mem, + T *buf, sycl::ext::oneapi::experimental::work_group_memory result, + T expected) { + const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + sycl::vec &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < 16; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } +} + +// Explicit instantiations for the relevant data types. +#define SUM_VEC(T) \ + template void sum_vec( \ + sycl::ext::oneapi::experimental::work_group_memory> \ + mem, \ + T * buf, sycl::ext::oneapi::experimental::work_group_memory result, \ + T expected); + +SUM_VEC(float); +SUM_VEC(double); +SUM_VEC(half); diff --git a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp new file mode 100644 index 0000000000000..f5c8b6651ffcc --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template +void sum_helper(sycl::ext::oneapi::experimental::work_group_memory mem, + sycl::ext::oneapi::experimental::work_group_memory ret, + size_t WGSIZE) { + for (int i = 0; i < WGSIZE; ++i) { + ret = ret + mem[i]; + } +} diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp new file mode 100644 index 0000000000000..ff2aa8aa19385 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp @@ -0,0 +1,132 @@ +// REQUIRES: aspect-usm_shared_allocations +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// UNSUPPORTED: cuda +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16004 + +// XFAIL: * +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15927 + +#include "common_free_function.hpp" + +// Basic usage reduction test using free function kernels. +// A global buffer is allocated using USM and it is passed to the kernel on the +// device. On the device, a work group memory buffer is allocated and each item +// copies the correspondng element of the global buffer to the corresponding +// element of the work group memory buffer using its lcoal index. The leader of +// every work-group, after waiting for every work-item to complete, then sums +// these values storing the result in another work group memory object. Finally, +// each work item then verifies that the sum of the work group memory elements +// equals the sum of the global buffer elements. This is repeated for several +// data types. + +queue q; +context ctx = q.get_context(); + +constexpr size_t SIZE = 128; +constexpr size_t VEC_SIZE = 16; + +template void test_marray() { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + + constexpr size_t WGSIZE = VEC_SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i) / WGSIZE; + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected); + cgh.parallel_for(ndr, k_sum); + }).wait(); +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test_marray(); +} + +template void test_vec() { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + + constexpr size_t WGSIZE = VEC_SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i) / WGSIZE; + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected); + cgh.parallel_for(ndr, k_sum); + }).wait(); +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test_vec(); +} + +template +void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + // The following ifndef is required due to a number of limitations of free + // function kernels. See CMPLRLLVM-61498. + // TODO: Remove it once these limitations are no longer there. +#ifndef __SYCL_DEVICE_ONLY__ + // Get the kernel object for the "mykernel" kernel. + auto Bundle = get_kernel_bundle(ctx); + kernel_id sum_id = ext::oneapi::experimental::get_kernel_id>(); + kernel k_sum = Bundle.get_kernel(sum_id); + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.set_args(mem, buf, result, expected, WGSIZE, UseHelper); + cgh.parallel_for(ndr, k_sum); + }).wait(); + +#endif // __SYCL_DEVICE_ONLY + free(buf, q); + if constexpr (sizeof...(Ts)) + test(SIZE, WGSIZE, UseHelper); +} + +int main() { + test(SIZE, SIZE, true /* UseHelper */); + test(SIZE, SIZE, false); + test(SIZE, SIZE / 2, false); + test(SIZE, SIZE / 4, false); + test_marray(); + test_vec(); + return 0; +} diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp new file mode 100644 index 0000000000000..5759e86f17fe7 --- /dev/null +++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp @@ -0,0 +1,242 @@ +// REQUIRES: aspect-usm_shared_allocations +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "common.hpp" + +queue q; +context ctx = q.get_context(); + +constexpr size_t SIZE = 128; + +template +void test_struct(size_t SIZE, size_t WGSIZE) { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + S *buf = malloc_shared>(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i].val = T(i); + expected = expected + buf[i].val; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory[]> mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i].val; + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_struct(SIZE, WGSIZE); +} + +void test_union(size_t SIZE, size_t WGSIZE) { + U *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + int expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + if (i % 2) + buf[i].s = S{i}; + else + buf[i].m = M{i}; + expected = expected + (i % 2) ? buf[i].s.val : buf[i].m.val; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + (i % 2) ? mem[i].s.val : mem[i].m.val; + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); +} + +template +void test(size_t SIZE, size_t WGSIZE, bool UseHelper) { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + if (!UseHelper) { + for (int i = 0; i < WGSIZE; ++i) { + result = result + mem[i]; + } + } else { + sum_helper(mem, result, WGSIZE); + } + assert(result == expected); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test(SIZE, WGSIZE, UseHelper); +} + +template void test_marray() { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + constexpr size_t WGSIZE = SIZE; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i) / WGSIZE; + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + // User-defined conversion from work group memory to underlying type is + // not applied during member access calls so we have to explicitly + // convert to the value_type ourselves. + marray &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_marray(); +} + +template void test_vec() { + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + constexpr size_t WGSIZE = 8; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i) / WGSIZE; + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory> mem{cgh}; + ext::oneapi::experimental ::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + constexpr T tolerance = 0.0001; + vec &data = mem; + data[local_id] = buf[local_id]; + group_barrier(it.get_group()); + if (it.get_group().leader()) { + result = 0; + for (int i = 0; i < WGSIZE; ++i) { + result = result + data[i]; + } + assert((result - expected) * (result - expected) <= tolerance); + } + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_vec(); +} + +template void test_atomic_ref() { + if (!(sizeof(T) == 4 || + (sizeof(T) == 8 && q.get_device().has(aspect::atomic64)))) { + std::cout << "Invalid type used with atomic_ref!\nSkipping the test!"; + return; + } + if (!check_half_aspect(q) || !check_double_aspect(q)) + return; + constexpr size_t WGSIZE = 8; + T *buf = malloc_shared(WGSIZE, q); + assert(buf && "Shared USM allocation failed!"); + T expected = 0; + for (int i = 0; i < WGSIZE; ++i) { + buf[i] = T(i); + expected = expected + buf[i]; + } + nd_range ndr{{SIZE}, {WGSIZE}}; + q.submit([&](sycl::handler &cgh) { + ext::oneapi::experimental::work_group_memory mem{WGSIZE, cgh}; + ext::oneapi::experimental::work_group_memory result{cgh}; + cgh.parallel_for(ndr, [=](nd_item<> it) { + size_t local_id = it.get_local_id(); + mem[local_id] = buf[local_id]; + atomic_ref + atomic_val{result}; + if (it.get_group().leader()) { + atomic_val.store(0); + } + group_barrier(it.get_group()); + atomic_val += mem[local_id]; + group_barrier(it.get_group()); + assert(atomic_val.load() == expected); + }); + }).wait(); + free(buf, q); + if constexpr (sizeof...(Ts)) + test_atomic_ref(); +} + +int main() { + test(SIZE, SIZE /* WorkGroupSize */, + true /* UseHelper */); + test(SIZE, SIZE, false); + test(SIZE, SIZE / 2, false); + test(SIZE, SIZE / 4, false); + test(SIZE, 1, false); + test(SIZE, 2, true); + test_marray(); + test_vec(); + test_atomic_ref(); + test_struct(SIZE, 4); + test_union(SIZE, SIZE); + test_union(SIZE, SIZE / 2); + test_union(SIZE, 1); + test_union(SIZE, 2); + return 0; +} diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp new file mode 100644 index 0000000000000..d79067d34079b --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp @@ -0,0 +1,43 @@ +// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=0 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s + +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +queue Q; + +// This test verifies the type restrictions on the two non-default constructors +// of work group memory. + +template void convertToDataT(DataT &data) {} + +template void test_bounded_arr() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}} + syclexp::work_group_memory mem{1, cgh}; + // expected-error@+1 5{{no viable overloaded '='}} + cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; }); + }); +} + +template void test_unbounded_arr() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}} + syclexp::work_group_memory mem{cgh}; + // expected-error@+1 5{{no viable overloaded '='}} + cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; }); + }); +} + +template void test() { + (test_bounded_arr(), ...); + (test_unbounded_arr(), ...); +} + +int main() { + test(); + return 0; +} diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp new file mode 100644 index 0000000000000..81f7f9d01293d --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp @@ -0,0 +1,102 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s +#include +#include +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +queue Q; + +struct S { + int a; + char b; +}; + +union U { + int a; + char b; +}; + +template void convertToDataT(DataT &data) {} + +template void test_constness() { + Q.submit([&](sycl::handler &cgh) { + nd_range<1> ndr{1, 1}; + syclexp::work_group_memory mem{syclexp::indeterminate}; + cgh.parallel_for(ndr, [=](nd_item<1> it) { + const auto mem1 = mem; + // since mem1 is const, all of the following should succeed. + if constexpr (!std::is_array_v) + mem1 = DataT{}; + convertToDataT(mem1); + const auto *ptr = &mem1; + const auto &mptr = mem1.template get_multi_ptr<>(); + }); + }); +} + +template +void test_helper(syclexp::work_group_memory mem) { + static_assert( + std::is_same_v::value_type, + std::remove_all_extents_t>); + syclexp::work_group_memory dummy{mem}; + mem = dummy; + Q.submit([&](sycl::handler &cgh) { + if constexpr (sycl::detail::is_unbounded_array_v) + mem = syclexp::work_group_memory{1, cgh}; + else + mem = syclexp::work_group_memory{cgh}; + nd_range<1> ndr{1, 1}; + cgh.parallel_for(ndr, [=](nd_item<1> it) { + convertToDataT(mem); + if constexpr (!std::is_array_v) + mem = DataT{}; + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr())>); + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr<>())>); + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::no>, + decltype(mem.template get_multi_ptr())>); + + static_assert( + std::is_same_v< + multi_ptr::value_type, + access::address_space::local_space, + access::decorated::yes>, + decltype(mem.template get_multi_ptr())>); + }); + }); +} + +template void test() { + syclexp::work_group_memory mem{syclexp::indeterminate}; + test_constness(); + test_helper(mem); + if constexpr (sizeof...(rest)) + test(); +} + +int main() { + test(); + test, marray, marray>(); + test, vec, vec>(); + test(); + test(); + test(); + test(); + return 0; +} diff --git a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp new file mode 100644 index 0000000000000..3d11b6d4ecbef --- /dev/null +++ b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp @@ -0,0 +1,22 @@ +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s +#include + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +// This test checks that a diagnostic is emitted when +// instantiating a work group memory class with the properties set to anything +// other than empty_properties_t + +template +void test_properties() { + // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties>>'}} + syclexp::work_group_memory{syclexp::indeterminate}; + if constexpr (sizeof...(PropertyListTs)) + test_properties(); +} + +int main() { + test_properties(); + return 0; +}