diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp new file mode 100644 index 0000000000000..2136d8b0f4307 --- /dev/null +++ b/sycl/test/check_device_code/group_store.cpp @@ -0,0 +1,629 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "group_store" --include-generated-funcs --version 4 +// NOTE: and manually adjusted to follow the related explicit instantiation. +// RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s + +// Windows/linux have some slight differences in IR generation (function +// arguments passing and long/long long differences/mangling) that could +// complicate test updates while not improving test coverage. Limiting to linux +// should be fine. +// REQUIRES: linux + +#include + +using namespace sycl; + +namespace oneapi_exp = sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi::experimental; + +using full_group_blocked = + decltype(properties(full_group, data_placement_blocked)); + +using naive_blocked = + decltype(properties(oneapi_exp::detail::naive, data_placement_blocked)); + +using opt_blocked = + decltype(properties(full_group, contiguous_memory, data_placement_blocked)); + +using full_group_striped = + decltype(properties(full_group, data_placement_striped)); + +using naive_striped = + decltype(properties(oneapi_exp::detail::naive, data_placement_striped)); + +using opt_striped = + decltype(properties(full_group, contiguous_memory, data_placement_striped)); + +template +using plain_global_ptr = typename sycl::detail::DecoratedType< + T, access::address_space::global_space>::type *; + +// Ensure `detail::naive` always results in no block loads/stores. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, plain_global_ptr, naive_blocked>( + sycl::sub_group, const int &, plain_global_ptr, naive_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// Check that optimized implementation is selected. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, plain_global_ptr, opt_blocked>( + sycl::sub_group, const int &, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_RKSM_SN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// Check that contiguous_memory can be auto-detected. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, plain_global_ptr, full_group_blocked>( + sycl::sub_group, const int &, plain_global_ptr, full_group_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.1") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// SYCL 2020's accessor can't be statically known to be contiguous. +using accessor_iter_t = accessor::iterator; +// Can't be optimized. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, accessor_iter_t, full_group_blocked>( + sycl::sub_group, const int &, accessor_iter_t, full_group_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.1") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] +// CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV5_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// Explicit property - optimize. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, accessor_iter_t, opt_blocked>(sycl::sub_group, + const int &, + accessor_iter_t, + opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_RKSN_SO_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] +// CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV5_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// Run-time alignment check is needed if type's alignment is less than BlockRead +// requirements. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, char, plain_global_ptr, opt_blocked>( + sycl::sub_group, const char &, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEcPU3AS1cNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_RKSM_SN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 1 dereferenceable(1) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[IN]], align 1, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void + +// Four shorts in blocked data layout could be loaded as a single 64-bit +// integer. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, short, 4, plain_global_ptr, opt_blocked>( + sycl::sub_group, span, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.3") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA17:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META19:![0-9]+]] +// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[TMP1]], 2 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA22:![0-9]+]] +// CHECK-NEXT: [[ADD_I:%.*]] = or disjoint i32 [[MUL_I]], [[I_0]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[ARRAYIDX]], align 2, !tbaa [[TBAA22]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] + +// Same, but make it `const short`. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, const short, 4, plain_global_ptr, opt_blocked>( + sycl::sub_group, span, plain_global_ptr, + opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.4") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA26:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META28:![0-9]+]] +// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[TMP1]], 2 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA22]] +// CHECK-NEXT: [[ADD_I:%.*]] = or disjoint i32 [[MUL_I]], [[I_0]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[ARRAYIDX]], align 2, !tbaa [[TBAA22]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] + +// Check for non-power-of-two size. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 3, plain_global_ptr, opt_blocked>( + sycl::sub_group, span, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA32:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META34:![0-9]+]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP1]], 3 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 3 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[I_0]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] + +// Four int elements in blocked data layout don't map directly to any BlockRead +// API. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 4, plain_global_ptr, opt_blocked>( + sycl::sub_group, span, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA38:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META40:![0-9]+]] +// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[TMP1]], 2 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD_I:%.*]] = or disjoint i32 [[MUL_I]], [[I_0]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] + +// Similar to four elements case but more complex to optimize. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 7, plain_global_ptr, opt_blocked>( + sycl::sub_group, span, plain_global_ptr, opt_blocked); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.7") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA44:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META46:![0-9]+]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP1]], 7 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 7 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[I_0]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] + +// Striped data layout with one element per work item isn't different from +// blocked data layout, so use span version only in the checks below. + +// Ensure `detail::naive` always results in no block loads/stores. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 2, plain_global_ptr, naive_striped>( + sycl::sub_group, span, plain_global_ptr, naive_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.9") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA50:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META52:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META55:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] + +// Check that optimized implementation is selected. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 2, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA50]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META59:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META62:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] + +// Check that contiguous_memory can be auto-detected. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 2, plain_global_ptr, full_group_striped>( + sycl::sub_group, span, plain_global_ptr, full_group_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA50]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META66:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META69:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] + +// SYCL 2020's accessor can't be statically known to be contiguous. +using accessor_iter_t = accessor::iterator; +// Can't be optimized. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 2, accessor_iter_t, full_group_striped>( + sycl::sub_group, span, accessor_iter_t, full_group_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA50]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META73:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META76:![0-9]+]] +// CHECK-NEXT: [[COPY_SROA_0_0_COPYLOAD_I:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[COPY_SROA_4_0_THIS_SROA_IDX_I:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 +// CHECK-NEXT: [[COPY_SROA_4_0_COPYLOAD_I:%.*]] = load i64, ptr [[COPY_SROA_4_0_THIS_SROA_IDX_I]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr addrspace(4) [[COPY_SROA_0_0_COPYLOAD_I]], i64 [[COPY_SROA_4_0_COPYLOAD_I]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP3]], i64 [[CONV5]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ADD_PTR_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] + +// Explicit property - optimize. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 2, accessor_iter_t, opt_striped>(sycl::sub_group, + span, + accessor_iter_t, + opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA50]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META80:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META83:![0-9]+]] +// CHECK-NEXT: [[COPY_SROA_0_0_COPYLOAD_I:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[COPY_SROA_4_0_THIS_SROA_IDX_I:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 +// CHECK-NEXT: [[COPY_SROA_4_0_COPYLOAD_I:%.*]] = load i64, ptr [[COPY_SROA_4_0_THIS_SROA_IDX_I]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr addrspace(4) [[COPY_SROA_0_0_COPYLOAD_I]], i64 [[COPY_SROA_4_0_COPYLOAD_I]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP3]], i64 [[CONV5]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ADD_PTR_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP86:![0-9]+]] + +// Run-time alignment check is needed if type's alignment is less than BlockRead +// requirements. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, char, 2, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEcLm2EPU3AS1cNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA87:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META89:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META92:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i8 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP95:![0-9]+]] + +// Just because there is a blocked data layout testcase, nothing inherently +// useful here. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, short, 4, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.3") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA17]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META96:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META99:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA22]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 2, !tbaa [[TBAA22]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] + +// Check for non-power-of-two size. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 3, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA32]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META103:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META106:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 3 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP109:![0-9]+]] + +// Even though power of two, still too many to map directly onto BloadRead API. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 16, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.13") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA110:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META112:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META115:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 16 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP118:![0-9]+]] + +// Non-power of two case bigger than max natively supported power of two case. +template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< + sycl::sub_group, int, 11, plain_global_ptr, opt_striped>( + sycl::sub_group, span, plain_global_ptr, opt_striped); +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.14") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA119:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META121:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META124:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 11 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK: for.body: +// CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[TMP2]], [[I_0]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP127:![0-9]+]]