diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp
index 0be24c912907b..c156c484f539d 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp
@@ -46,6 +46,15 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
   using value_type = std::remove_all_extents_t<DataT>;
 
 private:
+  // At the moment we do not have a way to set properties nor property values to
+  // set for work group memory. So, we check here for diagnostic purposes that
+  // the property list is empty.
+  // TODO: Remove this function and its occurrences in this file once properties
+  // have been created for work group memory.
+  void check_props_empty() const {
+    static_assert(std::is_same_v<PropertyListT, empty_properties_t> &&
+                  "Work group memory class does not support properties yet!");
+  }
   using decoratedPtr = typename sycl::detail::DecoratedType<
       value_type, access::address_space::local_space>::type *;
 
@@ -62,18 +71,22 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
 #endif
 
 public:
-  work_group_memory(const indeterminate_t &) {};
+  work_group_memory(const indeterminate_t &) { check_props_empty(); };
   work_group_memory(const work_group_memory &rhs) = default;
   work_group_memory &operator=(const work_group_memory &rhs) = default;
   template <typename T = DataT,
             typename = std::enable_if_t<!sycl::detail::is_unbounded_array_v<T>>>
   work_group_memory(handler &)
-      : sycl::detail::work_group_memory_impl(sizeof(DataT)) {}
+      : sycl::detail::work_group_memory_impl(sizeof(DataT)) {
+    check_props_empty();
+  }
   template <typename T = DataT,
             typename = std::enable_if_t<sycl::detail::is_unbounded_array_v<T>>>
   work_group_memory(size_t num, handler &)
       : sycl::detail::work_group_memory_impl(
-            num * sizeof(std::remove_extent_t<DataT>)) {}
+            num * sizeof(std::remove_extent_t<DataT>)) {
+    check_props_empty();
+  }
   template <access::decorated IsDecorated = access::decorated::no>
   multi_ptr<value_type, access::address_space::local_space, IsDecorated>
   get_multi_ptr() const {
diff --git a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp
similarity index 88%
rename from sycl/test-e2e/WorkGroupMemory/swap_test.cpp
rename to sycl/test-e2e/WorkGroupMemory/basic_usage.cpp
index 7552774edcbbf..4dbc2073d7009 100644
--- a/sycl/test-e2e/WorkGroupMemory/swap_test.cpp
+++ b/sycl/test-e2e/WorkGroupMemory/basic_usage.cpp
@@ -5,6 +5,8 @@
 #include <sycl/detail/core.hpp>
 #include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
 #include <sycl/group_barrier.hpp>
+#include <sycl/half_type.hpp>
+
 namespace syclexp = sycl::ext::oneapi::experimental;
 
 sycl::queue q;
@@ -50,7 +52,9 @@ template <typename T> void swap_scalar(T &a, T &b) {
       sycl::nd_range<1> ndr{size, wgsize};
       cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
         syclexp::work_group_memory<T> temp2{syclexp::indeterminate};
-        temp2 = temp; // temp and temp2 have the same underlying data
+        temp2 = temp;            // temp and temp2 have the same underlying data
+        assert(&temp2 == &temp); // check that both objects return same
+                                 // underlying address after assignment
         temp = acc_a[0];
         acc_a[0] = acc_b[0];
         acc_b[0] = temp2; // safe to use temp2
@@ -86,6 +90,8 @@ template <typename T> void swap_scalar(T &a, T &b) {
   assert(a == old_b && b == old_a && "Incorrect swap!");
 
   // Same as above but instead of using multi_ptr, use address-of operator.
+  // Also verify that get_multi_ptr() returns the same address as address-of
+  // operator.
   {
     sycl::buffer<T, 1> buf_a{&a, 1};
     sycl::buffer<T, 1> buf_b{&b, 1};
@@ -96,6 +102,7 @@ template <typename T> void swap_scalar(T &a, T &b) {
       syclexp::work_group_memory<T> temp2{cgh};
       sycl::nd_range<1> ndr{size, wgsize};
       cgh.parallel_for(ndr, [=](sycl::nd_item<> it) {
+        assert(&temp == temp.get_multi_ptr().get());
         temp = acc_a[0];
         acc_a[0] = acc_b[0];
         temp2 = *(&temp);
@@ -294,6 +301,8 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
         temp[i][j] = acc_a[i][j];
         acc_a[i][j] = acc_b[i][j];
         syclexp::work_group_memory<T[N][N]> temp2{temp};
+        assert(&temp2 == &temp); // check both objects return same underlying
+                                 // address after copy construction.
         acc_b[i][j] = temp2[i][j];
       });
     });
@@ -342,28 +351,28 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
 // so we can verify that each work-item sees the value written by its leader.
 // The test also is a sanity check that different work groups get different
 // work group memory locations as otherwise we'd have data races.
-void coherency(size_t size, size_t wgsize) {
+template <typename T> void coherency(size_t size, size_t wgsize) {
   q.submit([&](sycl::handler &cgh) {
-    syclexp::work_group_memory<int> data{cgh};
+    syclexp::work_group_memory<T> data{cgh};
     sycl::nd_range<1> ndr{size, wgsize};
     cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
       if (it.get_group().leader()) {
-        data = it.get_global_id() / wgsize;
+        data = T(it.get_global_id() / wgsize);
       }
       sycl::group_barrier(it.get_group());
-      assert(data == it.get_global_id() / wgsize);
+      assert(data == T(it.get_global_id() / wgsize));
     });
   });
 }
 
 constexpr size_t N = 32;
-int main() {
-  int intarr1[N][N];
-  int intarr2[N][N];
+template <typename T> void test() {
+  T intarr1[N][N];
+  T intarr2[N][N];
   for (int i = 0; i < N; ++i) {
     for (int j = 0; j < N; ++j) {
-      intarr1[i][j] = i + j;
-      intarr2[i][j] = i * j;
+      intarr1[i][j] = T(i) + T(j);
+      intarr2[i][j] = T(i) * T(j);
     }
   }
   for (int i = 0; i < N; ++i) {
@@ -373,10 +382,37 @@ int main() {
     swap_array_1d(intarr1[i], intarr2[i], 8);
   }
   swap_array_2d(intarr1, intarr2, 8);
-  coherency(N, N / 2);
-  coherency(N, N / 4);
-  coherency(N, N / 8);
-  coherency(N, N / 16);
-  coherency(N, N / 32);
+  coherency<T>(N, N / 2);
+  coherency<T>(N, N / 4);
+  coherency<T>(N, N / 8);
+  coherency<T>(N, N / 16);
+  coherency<T>(N, N / 32);
+}
+
+template <typename T> void test_ptr() {
+  T arr1[N][N];
+  T arr2[N][N];
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      swap_scalar(arr1[i][j], arr2[i][j]);
+    }
+    swap_array_1d(arr1[i], arr2[i], 8);
+  }
+  swap_array_2d(arr1, arr2, 8);
+}
+
+int main() {
+  test<int>();
+  test<char>();
+  test<uint16_t>();
+  if (q.get_device().has(sycl::aspect::fp16))
+    test<sycl::half>();
+  test_ptr<float *>();
+  test_ptr<int *>();
+  test_ptr<char *>();
+  test_ptr<uint16_t *>();
+  if (q.get_device().has(sycl::aspect::fp16))
+    test_ptr<sycl::half *>();
+  test_ptr<float *>();
   return 0;
 }
diff --git a/sycl/test-e2e/WorkGroupMemory/common.hpp b/sycl/test-e2e/WorkGroupMemory/common.hpp
new file mode 100644
index 0000000000000..64452745ede1b
--- /dev/null
+++ b/sycl/test-e2e/WorkGroupMemory/common.hpp
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <cassert>
+#include <iostream>
+#include <sycl/atomic_ref.hpp>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
+#include <sycl/ext/oneapi/free_function_queries.hpp>
+#include <sycl/group_barrier.hpp>
+#include <sycl/marray.hpp>
+#include <sycl/usm.hpp>
+#include <sycl/vector.hpp>
+
+using namespace sycl;
+
+template <typename T> bool check_half_aspect(queue &q) {
+  if (std::is_same_v<sycl::half, T> &&
+      !q.get_device().has(sycl::aspect::fp16)) {
+    std::cout << "Device does not support fp16 aspect. Skipping all tests with "
+                 "sycl::half type!"
+              << std::endl;
+    return false;
+  }
+  return true;
+}
+
+template <typename T> bool check_double_aspect(queue &q) {
+  if (std::is_same_v<T, double> && !q.get_device().has(aspect::fp64)) {
+    std::cout << "Device does not support fp64 aspect. Skipping all tests with "
+                 "double type!"
+              << std::endl;
+    return false;
+  }
+  return true;
+}
+
+template <typename T> struct S {
+  T val;
+};
+
+template <typename T> struct M {
+  T val;
+};
+
+union U {
+  S<int> s;
+  M<int> m;
+};
+
+template <typename T>
+void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
+                sycl::ext::oneapi::experimental::work_group_memory<T> ret,
+                size_t WGSIZE) {
+  for (int i = 0; i < WGSIZE; ++i) {
+    ret = ret + mem[i];
+  }
+}
diff --git a/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp
new file mode 100644
index 0000000000000..e13f50214593d
--- /dev/null
+++ b/sycl/test-e2e/WorkGroupMemory/common_free_function.hpp
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "common.hpp"
+#include "common_lambda.hpp"
+#include <cassert>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
+#include <sycl/ext/oneapi/free_function_queries.hpp>
+#include <sycl/group_barrier.hpp>
+#include <sycl/usm.hpp>
+
+using namespace sycl;
+
+template <typename T>
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
+    (ext::oneapi::experimental::nd_range_kernel<1>))
+void sum(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T *buf,
+         sycl::ext::oneapi::experimental::work_group_memory<T> result,
+         T expected, size_t WGSIZE, bool UseHelper) {
+  const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
+  size_t local_id = it.get_local_id();
+  mem[local_id] = buf[local_id];
+  group_barrier(it.get_group());
+  if (it.get_group().leader()) {
+    result = 0;
+    if (!UseHelper) {
+      for (int i = 0; i < WGSIZE; ++i) {
+        result = result + mem[i];
+      }
+    } else {
+      sum_helper(mem, result, WGSIZE);
+    }
+    assert(result == expected);
+  }
+}
+
+// Explicit instantiations for the relevant data types.
+// These are needed because free function kernel support is not fully
+// implemented yet.
+// TODO: Remove these once free function kernel support is fully there.
+#define SUM(T)                                                                 \
+  template void sum<T>(                                                        \
+      sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T * buf,    \
+      sycl::ext::oneapi::experimental::work_group_memory<T> result,            \
+      T expected, size_t WGSIZE, bool UseHelper);
+
+SUM(int)
+SUM(uint16_t)
+SUM(half)
+SUM(double)
+SUM(float)
+SUM(char)
+SUM(bool)
+
+template <typename T>
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
+    (ext::oneapi::experimental::nd_range_kernel<1>))
+void sum_marray(
+    sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>> mem,
+    T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
+    T expected) {
+  const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
+  size_t local_id = it.get_local_id();
+  constexpr T tolerance = 0.0001;
+  sycl::marray<T, 16> &data = mem;
+  data[local_id] = buf[local_id];
+  group_barrier(it.get_group());
+  if (it.get_group().leader()) {
+    result = 0;
+    for (int i = 0; i < 16; ++i) {
+      result = result + data[i];
+    }
+    assert((result - expected) * (result - expected) <= tolerance);
+  }
+}
+
+// Explicit instantiations for the relevant data types.
+#define SUM_MARRAY(T)                                                          \
+  template void sum_marray<T>(                                                 \
+      sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>>  \
+          mem,                                                                 \
+      T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,   \
+      T expected);
+
+SUM_MARRAY(float);
+SUM_MARRAY(double);
+SUM_MARRAY(half);
+
+template <typename T>
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
+    (ext::oneapi::experimental::nd_range_kernel<1>))
+void sum_vec(
+    sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>> mem,
+    T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
+    T expected) {
+  const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
+  size_t local_id = it.get_local_id();
+  constexpr T tolerance = 0.0001;
+  sycl::vec<T, 16> &data = mem;
+  data[local_id] = buf[local_id];
+  group_barrier(it.get_group());
+  if (it.get_group().leader()) {
+    result = 0;
+    for (int i = 0; i < 16; ++i) {
+      result = result + data[i];
+    }
+    assert((result - expected) * (result - expected) <= tolerance);
+  }
+}
+
+// Explicit instantiations for the relevant data types.
+#define SUM_VEC(T)                                                             \
+  template void sum_vec<T>(                                                    \
+      sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>>     \
+          mem,                                                                 \
+      T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,   \
+      T expected);
+
+SUM_VEC(float);
+SUM_VEC(double);
+SUM_VEC(half);
diff --git a/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp
new file mode 100644
index 0000000000000..f5c8b6651ffcc
--- /dev/null
+++ b/sycl/test-e2e/WorkGroupMemory/common_lambda.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <cassert>
+#include <sycl/atomic_ref.hpp>
+#include <sycl/detail/core.hpp>
+#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
+#include <sycl/ext/oneapi/free_function_queries.hpp>
+#include <sycl/group_barrier.hpp>
+#include <sycl/marray.hpp>
+#include <sycl/usm.hpp>
+#include <sycl/vector.hpp>
+
+using namespace sycl;
+
+template <typename T>
+void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
+                sycl::ext::oneapi::experimental::work_group_memory<T> ret,
+                size_t WGSIZE) {
+  for (int i = 0; i < WGSIZE; ++i) {
+    ret = ret + mem[i];
+  }
+}
diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp
new file mode 100644
index 0000000000000..ff2aa8aa19385
--- /dev/null
+++ b/sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp
@@ -0,0 +1,132 @@
+// REQUIRES: aspect-usm_shared_allocations
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+// UNSUPPORTED: cuda
+// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16004
+
+// XFAIL: *
+// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15927
+
+#include "common_free_function.hpp"
+
+// Basic usage reduction test using free function kernels.
+// A global buffer is allocated using USM and it is passed to the kernel on the
+// device. On the device, a work group memory buffer is allocated and each item
+// copies the correspondng element of the global buffer to the corresponding
+// element of the work group memory buffer using its lcoal index. The leader of
+// every work-group, after waiting for every work-item to complete, then sums
+// these values storing the result in another work group memory object. Finally,
+// each work item then verifies that the sum of the work group memory elements
+// equals the sum of the global buffer elements. This is repeated for several
+// data types.
+
+queue q;
+context ctx = q.get_context();
+
+constexpr size_t SIZE = 128;
+constexpr size_t VEC_SIZE = 16;
+
+template <typename T, typename... Ts> void test_marray() {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+
+  constexpr size_t WGSIZE = VEC_SIZE;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i) / WGSIZE;
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+#ifndef __SYCL_DEVICE_ONLY__
+  // Get the kernel object for the "mykernel" kernel.
+  auto Bundle = get_kernel_bundle<sycl::bundle_state::executable>(ctx);
+  kernel_id sum_id = ext::oneapi::experimental::get_kernel_id<sum_marray<T>>();
+  kernel k_sum = Bundle.get_kernel(sum_id);
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<marray<T, WGSIZE>> mem{cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.set_args(mem, buf, result, expected);
+     cgh.parallel_for(ndr, k_sum);
+   }).wait();
+#endif // __SYCL_DEVICE_ONLY
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_marray<Ts...>();
+}
+
+template <typename T, typename... Ts> void test_vec() {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+
+  constexpr size_t WGSIZE = VEC_SIZE;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i) / WGSIZE;
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+#ifndef __SYCL_DEVICE_ONLY__
+  // Get the kernel object for the "mykernel" kernel.
+  auto Bundle = get_kernel_bundle<sycl::bundle_state::executable>(ctx);
+  kernel_id sum_id = ext::oneapi::experimental::get_kernel_id<sum_vec<T>>();
+  kernel k_sum = Bundle.get_kernel(sum_id);
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<vec<T, WGSIZE>> mem{cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.set_args(mem, buf, result, expected);
+     cgh.parallel_for(ndr, k_sum);
+   }).wait();
+#endif // __SYCL_DEVICE_ONLY
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_vec<Ts...>();
+}
+
+template <typename T, typename... Ts>
+void test(size_t SIZE, size_t WGSIZE, bool UseHelper) {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i);
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  // The following ifndef is required due to a number of limitations of free
+  // function kernels. See CMPLRLLVM-61498.
+  // TODO: Remove it once these limitations are no longer there.
+#ifndef __SYCL_DEVICE_ONLY__
+  // Get the kernel object for the "mykernel" kernel.
+  auto Bundle = get_kernel_bundle<sycl::bundle_state::executable>(ctx);
+  kernel_id sum_id = ext::oneapi::experimental::get_kernel_id<sum<T>>();
+  kernel k_sum = Bundle.get_kernel(sum_id);
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<T[]> mem{WGSIZE, cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.set_args(mem, buf, result, expected, WGSIZE, UseHelper);
+     cgh.parallel_for(ndr, k_sum);
+   }).wait();
+
+#endif // __SYCL_DEVICE_ONLY
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test<Ts...>(SIZE, WGSIZE, UseHelper);
+}
+
+int main() {
+  test<int, uint16_t, half, double, float>(SIZE, SIZE, true /* UseHelper */);
+  test<int, float, half>(SIZE, SIZE, false);
+  test<int, double, char>(SIZE, SIZE / 2, false);
+  test<int, bool, char>(SIZE, SIZE / 4, false);
+  test_marray<float, double, half>();
+  test_vec<float, double, half>();
+  return 0;
+}
diff --git a/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp
new file mode 100644
index 0000000000000..5759e86f17fe7
--- /dev/null
+++ b/sycl/test-e2e/WorkGroupMemory/reduction_lambda.cpp
@@ -0,0 +1,242 @@
+// REQUIRES: aspect-usm_shared_allocations
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include "common.hpp"
+
+queue q;
+context ctx = q.get_context();
+
+constexpr size_t SIZE = 128;
+
+template <typename T, typename... Ts>
+void test_struct(size_t SIZE, size_t WGSIZE) {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+  S<T> *buf = malloc_shared<S<T>>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i].val = T(i);
+    expected = expected + buf[i].val;
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<S<T>[]> mem{WGSIZE, cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       mem[local_id] = buf[local_id];
+       group_barrier(it.get_group());
+       if (it.get_group().leader()) {
+         result = 0;
+         for (int i = 0; i < WGSIZE; ++i) {
+           result = result + mem[i].val;
+         }
+         assert(result == expected);
+       }
+     });
+   }).wait();
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_struct<Ts...>(SIZE, WGSIZE);
+}
+
+void test_union(size_t SIZE, size_t WGSIZE) {
+  U *buf = malloc_shared<U>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  int expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    if (i % 2)
+      buf[i].s = S<int>{i};
+    else
+      buf[i].m = M<int>{i};
+    expected = expected + (i % 2) ? buf[i].s.val : buf[i].m.val;
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<U[]> mem{WGSIZE, cgh};
+     ext::oneapi::experimental::work_group_memory<int> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       mem[local_id] = buf[local_id];
+       group_barrier(it.get_group());
+       if (it.get_group().leader()) {
+         result = 0;
+         for (int i = 0; i < WGSIZE; ++i) {
+           result = result + (i % 2) ? mem[i].s.val : mem[i].m.val;
+         }
+         assert(result == expected);
+       }
+     });
+   }).wait();
+  free(buf, q);
+}
+
+template <typename T, typename... Ts>
+void test(size_t SIZE, size_t WGSIZE, bool UseHelper) {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i);
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<T[]> mem{WGSIZE, cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       mem[local_id] = buf[local_id];
+       group_barrier(it.get_group());
+       if (it.get_group().leader()) {
+         result = 0;
+         if (!UseHelper) {
+           for (int i = 0; i < WGSIZE; ++i) {
+             result = result + mem[i];
+           }
+         } else {
+           sum_helper(mem, result, WGSIZE);
+         }
+         assert(result == expected);
+       }
+     });
+   }).wait();
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test<Ts...>(SIZE, WGSIZE, UseHelper);
+}
+
+template <typename T, typename... Ts> void test_marray() {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+  constexpr size_t WGSIZE = SIZE;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i) / WGSIZE;
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<marray<T, WGSIZE>> mem{cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       constexpr T tolerance = 0.0001;
+       // User-defined conversion from work group memory to underlying type is
+       // not applied during member access calls so we have to explicitly
+       // convert to the value_type ourselves.
+       marray<T, WGSIZE> &data = mem;
+       data[local_id] = buf[local_id];
+       group_barrier(it.get_group());
+       if (it.get_group().leader()) {
+         result = 0;
+         for (int i = 0; i < WGSIZE; ++i) {
+           result = result + data[i];
+         }
+         assert((result - expected) * (result - expected) <= tolerance);
+       }
+     });
+   }).wait();
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_marray<Ts...>();
+}
+
+template <typename T, typename... Ts> void test_vec() {
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+  constexpr size_t WGSIZE = 8;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i) / WGSIZE;
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<vec<T, WGSIZE>> mem{cgh};
+     ext::oneapi::experimental ::work_group_memory<T> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       constexpr T tolerance = 0.0001;
+       vec<T, WGSIZE> &data = mem;
+       data[local_id] = buf[local_id];
+       group_barrier(it.get_group());
+       if (it.get_group().leader()) {
+         result = 0;
+         for (int i = 0; i < WGSIZE; ++i) {
+           result = result + data[i];
+         }
+         assert((result - expected) * (result - expected) <= tolerance);
+       }
+     });
+   }).wait();
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_vec<Ts...>();
+}
+
+template <typename T, typename... Ts> void test_atomic_ref() {
+  if (!(sizeof(T) == 4 ||
+        (sizeof(T) == 8 && q.get_device().has(aspect::atomic64)))) {
+    std::cout << "Invalid type used with atomic_ref!\nSkipping the test!";
+    return;
+  }
+  if (!check_half_aspect<T>(q) || !check_double_aspect<T>(q))
+    return;
+  constexpr size_t WGSIZE = 8;
+  T *buf = malloc_shared<T>(WGSIZE, q);
+  assert(buf && "Shared USM allocation failed!");
+  T expected = 0;
+  for (int i = 0; i < WGSIZE; ++i) {
+    buf[i] = T(i);
+    expected = expected + buf[i];
+  }
+  nd_range ndr{{SIZE}, {WGSIZE}};
+  q.submit([&](sycl::handler &cgh) {
+     ext::oneapi::experimental::work_group_memory<T[]> mem{WGSIZE, cgh};
+     ext::oneapi::experimental::work_group_memory<T> result{cgh};
+     cgh.parallel_for(ndr, [=](nd_item<> it) {
+       size_t local_id = it.get_local_id();
+       mem[local_id] = buf[local_id];
+       atomic_ref<T, memory_order::acq_rel, memory_scope::work_group>
+           atomic_val{result};
+       if (it.get_group().leader()) {
+         atomic_val.store(0);
+       }
+       group_barrier(it.get_group());
+       atomic_val += mem[local_id];
+       group_barrier(it.get_group());
+       assert(atomic_val.load() == expected);
+     });
+   }).wait();
+  free(buf, q);
+  if constexpr (sizeof...(Ts))
+    test_atomic_ref<Ts...>();
+}
+
+int main() {
+  test<int, uint16_t, half, double, float>(SIZE, SIZE /* WorkGroupSize */,
+                                           true /* UseHelper */);
+  test<int, float, half>(SIZE, SIZE, false);
+  test<int, double, char>(SIZE, SIZE / 2, false);
+  test<int, bool, char>(SIZE, SIZE / 4, false);
+  test<int, float>(SIZE, 1, false);
+  test<int, float>(SIZE, 2, true);
+  test_marray<float, double, half>();
+  test_vec<float, double, half>();
+  test_atomic_ref<int, long, float, double>();
+  test_struct<int, float, double>(SIZE, 4);
+  test_union(SIZE, SIZE);
+  test_union(SIZE, SIZE / 2);
+  test_union(SIZE, 1);
+  test_union(SIZE, 2);
+  return 0;
+}
diff --git a/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp
new file mode 100644
index 0000000000000..d79067d34079b
--- /dev/null
+++ b/sycl/test/extensions/WorkGroupMemory/api_misuse_test.cpp
@@ -0,0 +1,43 @@
+// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=0 -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s
+
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+queue Q;
+
+// This test verifies the type restrictions on the two non-default constructors
+// of work group memory.
+
+template <typename DataT> void convertToDataT(DataT &data) {}
+
+template <typename DataT> void test_bounded_arr() {
+  Q.submit([&](sycl::handler &cgh) {
+    nd_range<1> ndr{1, 1};
+    // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}}
+    syclexp::work_group_memory<DataT[1]> mem{1, cgh};
+    // expected-error@+1 5{{no viable overloaded '='}}
+    cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; });
+  });
+}
+
+template <typename DataT> void test_unbounded_arr() {
+  Q.submit([&](sycl::handler &cgh) {
+    nd_range<1> ndr{1, 1};
+    // expected-error-re@+1 5{{no matching constructor for initialization of 'syclexp::work_group_memory<{{.*}}>'}}
+    syclexp::work_group_memory<DataT[]> mem{cgh};
+    // expected-error@+1 5{{no viable overloaded '='}}
+    cgh.parallel_for(ndr, [=](nd_item<1> it) { mem = {DataT{}}; });
+  });
+}
+
+template <typename... DataTs> void test() {
+  (test_bounded_arr<DataTs>(), ...);
+  (test_unbounded_arr<DataTs>(), ...);
+}
+
+int main() {
+  test<char, int16_t, int[1], double[2], half[3]>();
+  return 0;
+}
diff --git a/sycl/test/extensions/WorkGroupMemory/api_test.cpp b/sycl/test/extensions/WorkGroupMemory/api_test.cpp
new file mode 100644
index 0000000000000..81f7f9d01293d
--- /dev/null
+++ b/sycl/test/extensions/WorkGroupMemory/api_test.cpp
@@ -0,0 +1,102 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s
+#include <cassert>
+#include <sycl/sycl.hpp>
+#include <type_traits>
+
+using namespace sycl;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+queue Q;
+
+struct S {
+  int a;
+  char b;
+};
+
+union U {
+  int a;
+  char b;
+};
+
+template <typename DataT> void convertToDataT(DataT &data) {}
+
+template <typename DataT> void test_constness() {
+  Q.submit([&](sycl::handler &cgh) {
+    nd_range<1> ndr{1, 1};
+    syclexp::work_group_memory<DataT> mem{syclexp::indeterminate};
+    cgh.parallel_for(ndr, [=](nd_item<1> it) {
+      const auto mem1 = mem;
+      // since mem1 is const, all of the following should succeed.
+      if constexpr (!std::is_array_v<DataT>)
+        mem1 = DataT{};
+      convertToDataT(mem1);
+      const auto *ptr = &mem1;
+      const auto &mptr = mem1.template get_multi_ptr<>();
+    });
+  });
+}
+
+template <typename DataT>
+void test_helper(syclexp::work_group_memory<DataT> mem) {
+  static_assert(
+      std::is_same_v<typename syclexp::work_group_memory<DataT>::value_type,
+                     std::remove_all_extents_t<DataT>>);
+  syclexp::work_group_memory<DataT> dummy{mem};
+  mem = dummy;
+  Q.submit([&](sycl::handler &cgh) {
+    if constexpr (sycl::detail::is_unbounded_array_v<DataT>)
+      mem = syclexp::work_group_memory<DataT>{1, cgh};
+    else
+      mem = syclexp::work_group_memory<DataT>{cgh};
+    nd_range<1> ndr{1, 1};
+    cgh.parallel_for(ndr, [=](nd_item<1> it) {
+      convertToDataT(mem);
+      if constexpr (!std::is_array_v<DataT>)
+        mem = DataT{};
+      static_assert(
+          std::is_same_v<
+              multi_ptr<typename syclexp::work_group_memory<DataT>::value_type,
+                        access::address_space::local_space,
+                        access::decorated::no>,
+              decltype(mem.template get_multi_ptr<access::decorated::no>())>);
+      static_assert(
+          std::is_same_v<
+              multi_ptr<typename syclexp::work_group_memory<DataT>::value_type,
+                        access::address_space::local_space,
+                        access::decorated::no>,
+              decltype(mem.template get_multi_ptr<>())>);
+      static_assert(
+          std::is_same_v<
+              multi_ptr<typename syclexp::work_group_memory<DataT>::value_type,
+                        access::address_space::local_space,
+                        access::decorated::no>,
+              decltype(mem.template get_multi_ptr<access::decorated::no>())>);
+
+      static_assert(
+          std::is_same_v<
+              multi_ptr<typename syclexp::work_group_memory<DataT>::value_type,
+                        access::address_space::local_space,
+                        access::decorated::yes>,
+              decltype(mem.template get_multi_ptr<access::decorated::yes>())>);
+    });
+  });
+}
+
+template <typename Type, typename... rest> void test() {
+  syclexp::work_group_memory<Type> mem{syclexp::indeterminate};
+  test_constness<Type>();
+  test_helper(mem);
+  if constexpr (sizeof...(rest))
+    test<rest...>();
+}
+
+int main() {
+  test<char, int16_t, int, double, half>();
+  test<marray<int, 1>, marray<int, 2>, marray<int, 8>>();
+  test<vec<int, 1>, vec<int, 2>, vec<int, 8>>();
+  test<char *, int16_t *, int *, double *, half *>();
+  test<S, U>();
+  test<char[1], char[2], int[1], int[2]>();
+  test<double[], half[]>();
+  return 0;
+}
diff --git a/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp
new file mode 100644
index 0000000000000..3d11b6d4ecbef
--- /dev/null
+++ b/sycl/test/extensions/WorkGroupMemory/empty_properties_test.cpp
@@ -0,0 +1,22 @@
+// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note,warning %s
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+// This test checks that a diagnostic is emitted when
+// instantiating a work group memory class with the properties set to anything
+// other than empty_properties_t
+
+template <typename PropertyListT, typename... PropertyListTs>
+void test_properties() {
+  // expected-error-re@sycl/ext/oneapi/experimental/work_group_memory.hpp:* 2{{static assertion failed due to requirement 'std::is_same_v<{{.*}}, sycl::ext::oneapi::experimental::properties<sycl::ext::oneapi::experimental::detail::properties_type_list<>>>'}}
+  syclexp::work_group_memory<int, PropertyListT>{syclexp::indeterminate};
+  if constexpr (sizeof...(PropertyListTs))
+    test_properties<PropertyListTs...>();
+}
+
+int main() {
+  test_properties<int, syclexp::work_group_progress_key>();
+  return 0;
+}