diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 2b27150112ad8..820c8e12d2449 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -719,13 +719,12 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { std::optional CurrVS = getVectorSplit(cast(CallType->getContainedType(I))); - // This case does not seem to happen, but it is possible for - // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit - // is not returned and we will bailout of handling this call. - // The secondary bailout case is if NumPacked does not match. - // This can happen if ScalarizeMinBits is not set to the default. - // This means with certain ScalarizeMinBits intrinsics like frexp - // will only scalarize when the struct elements have the same bitness. + // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a + // VectorSplit is not returned and we will bailout of handling this call. + // The secondary bailout case is if NumPacked does not match. This can + // happen if ScalarizeMinBits is not set to the default. This means with + // certain ScalarizeMinBits intrinsics like frexp will only scalarize when + // the struct elements have the same bitness. if (!CurrVS || CurrVS->NumPacked != VS->NumPacked) return false; if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I, TTI)) @@ -1083,6 +1082,18 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { std::optional VS = getVectorSplit(VecType); if (!VS) return false; + for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) { + std::optional CurrVS = + getVectorSplit(cast(OpTy->getContainedType(I))); + // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a + // VectorSplit is not returned and we will bailout of handling this call. + // The secondary bailout case is if NumPacked does not match. This can + // happen if ScalarizeMinBits is not set to the default. This means with + // certain ScalarizeMinBits intrinsics like frexp will only scalarize when + // the struct elements have the same bitness. + if (!CurrVS || CurrVS->NumPacked != VS->NumPacked) + return false; + } IRBuilder<> Builder(&EVI); Scatterer Op0 = scatter(&EVI, Op, *VS); assert(!EVI.getIndices().empty() && "Make sure an index exists"); diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll index 97cc71626e208..f9e6774ffff64 100644 --- a/llvm/test/Transforms/Scalarizer/min-bits.ll +++ b/llvm/test/Transforms/Scalarizer/min-bits.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN16 ; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN32 +; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN64 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" define void @load_add_store_v2i16(ptr %pa, ptr %pb) { @@ -23,6 +24,13 @@ define void @load_add_store_v2i16(ptr %pa, ptr %pb) { ; MIN32-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]] ; MIN32-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @load_add_store_v2i16( +; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8 +; MIN64-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]] +; MIN64-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8 +; MIN64-NEXT: ret void ; %a = load <2 x i16>, ptr %pa, align 8 %b = load <2 x i16>, ptr %pb, align 8 @@ -63,6 +71,13 @@ define void @load_add_store_v3i16(ptr %pa, ptr %pb) { ; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 ; MIN32-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @load_add_store_v3i16( +; MIN64-NEXT: [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[B:%.*]] = load <3 x i16>, ptr [[PB:%.*]], align 8 +; MIN64-NEXT: [[C:%.*]] = add <3 x i16> [[A]], [[B]] +; MIN64-NEXT: store <3 x i16> [[C]], ptr [[PA]], align 8 +; MIN64-NEXT: ret void ; %a = load <3 x i16>, ptr %pa, align 8 %b = load <3 x i16>, ptr %pb, align 8 @@ -109,6 +124,13 @@ define void @load_add_store_v4i16(ptr %pa, ptr %pb) { ; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8 ; MIN32-NEXT: store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @load_add_store_v4i16( +; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[B:%.*]] = load <4 x i16>, ptr [[PB:%.*]], align 8 +; MIN64-NEXT: [[C:%.*]] = add <4 x i16> [[A]], [[B]] +; MIN64-NEXT: store <4 x i16> [[C]], ptr [[PA]], align 8 +; MIN64-NEXT: ret void ; %a = load <4 x i16>, ptr %pa, align 8 %b = load <4 x i16>, ptr %pb, align 8 @@ -153,6 +175,13 @@ define void @load_add_store_v4i10(ptr %pa, ptr %pb) { ; MIN32-NEXT: [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3 ; MIN32-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @load_add_store_v4i10( +; MIN64-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8 +; MIN64-NEXT: [[C:%.*]] = add <4 x i10> [[A]], [[B]] +; MIN64-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8 +; MIN64-NEXT: ret void ; %a = load <4 x i10>, ptr %pa, align 8 %b = load <4 x i10>, ptr %pb, align 8 @@ -176,6 +205,10 @@ define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, ; MIN32-LABEL: @select_uniform_condition_v2f16( ; MIN32-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]] ; MIN32-NEXT: ret <2 x half> [[R]] +; +; MIN64-LABEL: @select_uniform_condition_v2f16( +; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]] +; MIN64-NEXT: ret <2 x half> [[R]] ; %r = select i1 %cc, <2 x half> %a, <2 x half> %b ret <2 x half> %r @@ -207,6 +240,10 @@ define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 ; MIN32-NEXT: ret <3 x half> [[R]] +; +; MIN64-LABEL: @select_uniform_condition_v3f16( +; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <3 x half> [[A:%.*]], <3 x half> [[B:%.*]] +; MIN64-NEXT: ret <3 x half> [[R]] ; %r = select i1 %cc, <3 x half> %a, <3 x half> %b ret <3 x half> %r @@ -243,6 +280,10 @@ define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> ; MIN32-NEXT: ret <4 x half> [[R]] +; +; MIN64-LABEL: @select_uniform_condition_v4f16( +; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]] +; MIN64-NEXT: ret <4 x half> [[R]] ; %r = select i1 %cc, <4 x half> %a, <4 x half> %b ret <4 x half> %r @@ -270,6 +311,10 @@ define <2 x half> @unary_v2f16(<2 x half> %a) { ; MIN32-LABEL: @unary_v2f16( ; MIN32-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]] ; MIN32-NEXT: ret <2 x half> [[R]] +; +; MIN64-LABEL: @unary_v2f16( +; MIN64-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]] +; MIN64-NEXT: ret <2 x half> [[R]] ; %r = fneg <2 x half> %a ret <2 x half> %r @@ -296,6 +341,10 @@ define <3 x half> @unary_v3f16(<3 x half> %a) { ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 ; MIN32-NEXT: ret <3 x half> [[R]] +; +; MIN64-LABEL: @unary_v3f16( +; MIN64-NEXT: [[R:%.*]] = fneg <3 x half> [[A:%.*]] +; MIN64-NEXT: ret <3 x half> [[R]] ; %r = fneg <3 x half> %a ret <3 x half> %r @@ -326,6 +375,10 @@ define <4 x half> @unary_v4f16(<4 x half> %a) { ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> ; MIN32-NEXT: ret <4 x half> [[R]] +; +; MIN64-LABEL: @unary_v4f16( +; MIN64-NEXT: [[R:%.*]] = fneg <4 x half> [[A:%.*]] +; MIN64-NEXT: ret <4 x half> [[R]] ; %r = fneg <4 x half> %a ret <4 x half> %r @@ -346,6 +399,10 @@ define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) { ; MIN32-LABEL: @binary_v2f16( ; MIN32-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]] ; MIN32-NEXT: ret <2 x half> [[R]] +; +; MIN64-LABEL: @binary_v2f16( +; MIN64-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]] +; MIN64-NEXT: ret <2 x half> [[R]] ; %r = fadd <2 x half> %a, %b ret <2 x half> %r @@ -377,6 +434,10 @@ define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) { ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 ; MIN32-NEXT: ret <3 x half> [[R]] +; +; MIN64-LABEL: @binary_v3f16( +; MIN64-NEXT: [[R:%.*]] = fadd <3 x half> [[A:%.*]], [[B:%.*]] +; MIN64-NEXT: ret <3 x half> [[R]] ; %r = fadd <3 x half> %a, %b ret <3 x half> %r @@ -413,6 +474,10 @@ define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) { ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> ; MIN32-NEXT: ret <4 x half> [[R]] +; +; MIN64-LABEL: @binary_v4f16( +; MIN64-NEXT: [[R:%.*]] = fadd <4 x half> [[A:%.*]], [[B:%.*]] +; MIN64-NEXT: ret <4 x half> [[R]] ; %r = fadd <4 x half> %a, %b ret <4 x half> %r @@ -431,6 +496,10 @@ define <2 x i16> @fptosi_v2f16(<2 x half> %a) { ; MIN32-LABEL: @fptosi_v2f16( ; MIN32-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16> ; MIN32-NEXT: ret <2 x i16> [[R]] +; +; MIN64-LABEL: @fptosi_v2f16( +; MIN64-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16> +; MIN64-NEXT: ret <2 x i16> [[R]] ; %r = fptosi <2 x half> %a to <2 x i16> ret <2 x i16> %r @@ -457,6 +526,10 @@ define <3 x i16> @fptosi_v3f16(<3 x half> %a) { ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2 ; MIN32-NEXT: ret <3 x i16> [[R]] +; +; MIN64-LABEL: @fptosi_v3f16( +; MIN64-NEXT: [[R:%.*]] = fptosi <3 x half> [[A:%.*]] to <3 x i16> +; MIN64-NEXT: ret <3 x i16> [[R]] ; %r = fptosi <3 x half> %a to <3 x i16> ret <3 x i16> %r @@ -487,6 +560,10 @@ define <4 x i16> @fptosi_v4f16(<4 x half> %a) { ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> ; MIN32-NEXT: ret <4 x i16> [[R]] +; +; MIN64-LABEL: @fptosi_v4f16( +; MIN64-NEXT: [[R:%.*]] = fptosi <4 x half> [[A:%.*]] to <4 x i16> +; MIN64-NEXT: ret <4 x i16> [[R]] ; %r = fptosi <4 x half> %a to <4 x i16> ret <4 x i16> %r @@ -511,6 +588,10 @@ define <4 x float> @fpext_v4f16(<4 x half> %a) { ; MIN32-LABEL: @fpext_v4f16( ; MIN32-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float> ; MIN32-NEXT: ret <4 x float> [[R]] +; +; MIN64-LABEL: @fpext_v4f16( +; MIN64-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float> +; MIN64-NEXT: ret <4 x float> [[R]] ; %r = fpext <4 x half> %a to <4 x float> ret <4 x float> %r @@ -544,6 +625,10 @@ define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) { ; MIN32-LABEL: @gep1_v4( ; MIN32-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]] ; MIN32-NEXT: ret <4 x ptr> [[P]] +; +; MIN64-LABEL: @gep1_v4( +; MIN64-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]] +; MIN64-NEXT: ret <4 x ptr> [[P]] ; %p = getelementptr i32, ptr %base, <4 x i16> %a ret <4 x ptr> %p @@ -592,6 +677,10 @@ define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) { ; MIN32-LABEL: @gep3_v4( ; MIN32-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]] ; MIN32-NEXT: ret <4 x ptr> [[P]] +; +; MIN64-LABEL: @gep3_v4( +; MIN64-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]] +; MIN64-NEXT: ret <4 x ptr> [[P]] ; %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a ret <4 x ptr> %p @@ -609,6 +698,11 @@ define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) { ; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1 ; MIN32-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @insertelement_v2i16( +; MIN64-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1 +; MIN64-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4 +; MIN64-NEXT: ret void ; %r = insertelement <2 x i16> %a, i16 %b, i64 1 store <2 x i16> %r, ptr %p @@ -632,6 +726,11 @@ define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) { ; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 ; MIN32-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @insertelement_v3i16( +; MIN64-NEXT: [[R:%.*]] = insertelement <3 x i16> [[A:%.*]], i16 [[B:%.*]], i64 2 +; MIN64-NEXT: store <3 x i16> [[R]], ptr [[P:%.*]], align 8 +; MIN64-NEXT: ret void ; %r = insertelement <3 x i16> %a, i16 %b, i64 2 store <3 x i16> %r, ptr %p @@ -660,6 +759,11 @@ define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) { ; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8 ; MIN32-NEXT: store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @insertelement_v4i16( +; MIN64-NEXT: [[R:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 [[B:%.*]], i64 3 +; MIN64-NEXT: store <4 x i16> [[R]], ptr [[P:%.*]], align 8 +; MIN64-NEXT: ret void ; %r = insertelement <4 x i16> %a, i16 %b, i64 3 store <4 x i16> %r, ptr %p @@ -677,6 +781,11 @@ define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) { ; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4 ; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1 ; MIN32-NEXT: ret <2 x i16> [[R]] +; +; MIN64-LABEL: @load_insertelement_v2i16( +; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4 +; MIN64-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1 +; MIN64-NEXT: ret <2 x i16> [[R]] ; %a = load <2 x i16>, ptr %pa %r = insertelement <2 x i16> %a, i16 %b, i64 1 @@ -698,6 +807,11 @@ define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) { ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2 ; MIN32-NEXT: ret <3 x i16> [[R]] +; +; MIN64-LABEL: @load_insertelement_v3i16( +; MIN64-NEXT: [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[R:%.*]] = insertelement <3 x i16> [[A]], i16 [[B:%.*]], i64 2 +; MIN64-NEXT: ret <3 x i16> [[R]] ; %a = load <3 x i16>, ptr %pa %r = insertelement <3 x i16> %a, i16 %b, i64 2 @@ -726,6 +840,11 @@ define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) { ; MIN32-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> ; MIN32-NEXT: ret <4 x i16> [[R]] +; +; MIN64-LABEL: @load_insertelement_v4i16( +; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[R:%.*]] = insertelement <4 x i16> [[A]], i16 [[B:%.*]], i64 3 +; MIN64-NEXT: ret <4 x i16> [[R]] ; %a = load <4 x i16>, ptr %pa %r = insertelement <4 x i16> %a, i16 %b, i64 3 @@ -758,6 +877,13 @@ define void @shufflevector_grow(ptr %pa, ptr %pb) { ; MIN32-NEXT: [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> ; MIN32-NEXT: store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @shufflevector_grow( +; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4 +; MIN64-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4 +; MIN64-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> +; MIN64-NEXT: store <4 x i16> [[R]], ptr [[PA]], align 8 +; MIN64-NEXT: ret void ; %a = load <2 x i16>, ptr %pa %b = load <2 x i16>, ptr %pb @@ -786,6 +912,12 @@ define void @shufflevector_shrink(ptr %pa) { ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> ; MIN32-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4 ; MIN32-NEXT: ret void +; +; MIN64-LABEL: @shufflevector_shrink( +; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8 +; MIN64-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> +; MIN64-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4 +; MIN64-NEXT: ret void ; %a = load <4 x i16>, ptr %pa %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> @@ -832,6 +964,22 @@ define void @phi_v2f16(ptr %base, i64 %bound) { ; MIN32-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4 ; MIN32-NEXT: ret void ; +; MIN64-LABEL: @phi_v2f16( +; MIN64-NEXT: entry: +; MIN64-NEXT: br label [[LOOP:%.*]] +; MIN64: loop: +; MIN64-NEXT: [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]] +; MIN64-NEXT: [[A:%.*]] = load <2 x half>, ptr [[P]], align 2 +; MIN64-NEXT: [[X_NEXT]] = fadd <2 x half> [[X]], [[A]] +; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN64: end: +; MIN64-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4 +; MIN64-NEXT: ret void +; entry: br label %loop @@ -901,6 +1049,22 @@ define void @phi_v3f16(ptr %base, i64 %bound) { ; MIN32-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 ; MIN32-NEXT: ret void ; +; MIN64-LABEL: @phi_v3f16( +; MIN64-NEXT: entry: +; MIN64-NEXT: br label [[LOOP:%.*]] +; MIN64: loop: +; MIN64-NEXT: [[X:%.*]] = phi <3 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE:%.*]], i64 [[IDX]] +; MIN64-NEXT: [[A:%.*]] = load <3 x half>, ptr [[P]], align 2 +; MIN64-NEXT: [[X_NEXT]] = fadd <3 x half> [[X]], [[A]] +; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN64: end: +; MIN64-NEXT: store <3 x half> [[X_NEXT]], ptr [[BASE]], align 8 +; MIN64-NEXT: ret void +; entry: br label %loop @@ -976,6 +1140,22 @@ define void @phi_v4f16(ptr %base, i64 %bound) { ; MIN32-NEXT: store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4 ; MIN32-NEXT: ret void ; +; MIN64-LABEL: @phi_v4f16( +; MIN64-NEXT: entry: +; MIN64-NEXT: br label [[LOOP:%.*]] +; MIN64: loop: +; MIN64-NEXT: [[X:%.*]] = phi <4 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; MIN64-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE:%.*]], i64 [[IDX]] +; MIN64-NEXT: [[A:%.*]] = load <4 x half>, ptr [[P]], align 2 +; MIN64-NEXT: [[X_NEXT]] = fadd <4 x half> [[X]], [[A]] +; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; MIN64: end: +; MIN64-NEXT: store <4 x half> [[X_NEXT]], ptr [[BASE]], align 8 +; MIN64-NEXT: ret void +; entry: br label %loop @@ -1009,6 +1189,10 @@ define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) { ; MIN32-LABEL: @call_v2f16( ; MIN32-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]]) ; MIN32-NEXT: ret <2 x half> [[R]] +; +; MIN64-LABEL: @call_v2f16( +; MIN64-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]]) +; MIN64-NEXT: ret <2 x half> [[R]] ; %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %r @@ -1040,6 +1224,10 @@ define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) { ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2 ; MIN32-NEXT: ret <3 x half> [[R]] +; +; MIN64-LABEL: @call_v3f16( +; MIN64-NEXT: [[R:%.*]] = call <3 x half> @llvm.minnum.v3f16(<3 x half> [[A:%.*]], <3 x half> [[B:%.*]]) +; MIN64-NEXT: ret <3 x half> [[R]] ; %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b) ret <3 x half> %r @@ -1076,11 +1264,53 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) { ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> ; MIN32-NEXT: ret <4 x half> [[R]] +; +; MIN64-LABEL: @call_v4f16( +; MIN64-NEXT: [[R:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]]) +; MIN64-NEXT: ret <4 x half> [[R]] ; %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %r } +define <3 x i32> @uadd_with_overflow_v3i32(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: @uadd_with_overflow_v3i32( +; CHECK-NEXT: [[T:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[T]], 0 +; CHECK-NEXT: ret <3 x i32> [[R]] +; + %t = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %a, <3 x i32> %b) + %r = extractvalue { <3 x i32>, <3 x i1> } %t, 0 + ret <3 x i32> %r +} + +define noundef <2 x half> @frexp_v2f16(<2 x half> noundef %h) { +; MIN16-LABEL: @frexp_v2f16( +; MIN16-NEXT: [[H_I0:%.*]] = extractelement <2 x half> [[H:%.*]], i64 0 +; MIN16-NEXT: [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]]) +; MIN16-NEXT: [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1 +; MIN16-NEXT: [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]]) +; MIN16-NEXT: [[E0_ELEM0:%.*]] = extractvalue { half, i32 } [[R_I0]], 0 +; MIN16-NEXT: [[E0_ELEM01:%.*]] = extractvalue { half, i32 } [[R_I1]], 0 +; MIN16-NEXT: [[E0_UPTO0:%.*]] = insertelement <2 x half> poison, half [[E0_ELEM0]], i64 0 +; MIN16-NEXT: [[E0:%.*]] = insertelement <2 x half> [[E0_UPTO0]], half [[E0_ELEM01]], i64 1 +; MIN16-NEXT: ret <2 x half> [[E0]] +; +; MIN32-LABEL: @frexp_v2f16( +; MIN32-NEXT: [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]]) +; MIN32-NEXT: [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0 +; MIN32-NEXT: ret <2 x half> [[E0]] +; +; MIN64-LABEL: @frexp_v2f16( +; MIN64-NEXT: [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]]) +; MIN64-NEXT: [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0 +; MIN64-NEXT: ret <2 x half> [[E0]] +; + %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h) + %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0 + ret <2 x half> %e0 +} + declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)