From 24233ed3ac1b10a777d03c4d8b588b354d9b507d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 8 Jul 2024 14:12:31 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 51 +++++++++---------- .../SLPVectorizer/AArch64/getelementptr.ll | 31 +++++------ .../SLPVectorizer/AArch64/loadorder.ll | 20 ++++---- .../AArch64/transpose-inseltpoison.ll | 8 +-- .../SLPVectorizer/AArch64/transpose.ll | 8 +-- .../SLPVectorizer/RISCV/reductions.ll | 8 +-- .../RISCV/shuffled-gather-casted.ll | 16 +++--- .../RISCV/unsigned-icmp-signed-op.ll | 4 +- .../SystemZ/minbitwidth-trunc.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 20 ++++---- .../X86/blending-shuffle-inseltpoison.ll | 2 +- .../SLPVectorizer/X86/blending-shuffle.ll | 2 +- .../SLPVectorizer/X86/entries-different-vf.ll | 6 +-- .../X86/external-used-across-reductions.ll | 11 ++-- .../X86/extract-many-users-buildvector.ll | 12 +++-- .../X86/extract-scalar-from-undef.ll | 6 +-- .../extractelement-single-use-many-nodes.ll | 8 +-- .../gather-extractelements-different-bbs.ll | 14 ++--- .../SLPVectorizer/X86/horizontal-list.ll | 19 +++---- .../X86/icmp-altopcode-after-reordering.ll | 4 +- .../X86/int-bitcast-minbitwidth.ll | 2 +- .../X86/minbitwidth-node-with-multi-users.ll | 8 +-- .../X86/minbitwidth-transformed-operand.ll | 8 +-- .../X86/reduction-bool-logic-op-inside.ll | 8 +-- .../SLPVectorizer/X86/reduction-logical.ll | 21 ++++---- .../SLPVectorizer/X86/reduction_unrolled.ll | 6 +-- .../X86/remark_gather-load-redux-cost.ll | 2 +- .../SLPVectorizer/X86/reorder-vf-to-resize.ll | 6 +-- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 12 ++--- .../slp-umax-rdx-matcher-crash.ll | 2 +- 30 files changed, 159 insertions(+), 168 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b754950199f93..366b267ac1843 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -16758,9 +16758,8 @@ class HorizontalReduction { SmallVectorImpl &ExtraArgs, SmallVectorImpl &PossibleReducedVals, SmallVectorImpl &ReductionOps) { - for (int I = getFirstOperandIndex(TreeN), - End = getNumberOfOperands(TreeN); - I < End; ++I) { + for (int I : reverse(seq(getFirstOperandIndex(TreeN), + getNumberOfOperands(TreeN)))) { Value *EdgeVal = getRdxOperand(TreeN, I); ReducedValsToOps[EdgeVal].push_back(TreeN); auto *EdgeInst = dyn_cast(EdgeVal); @@ -16796,7 +16795,6 @@ class HorizontalReduction { initReductionOps(Root); DenseMap> LoadsMap; SmallSet LoadKeyUsed; - SmallPtrSet DoNotReverseVals; auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) { Value *Ptr = getUnderlyingObject(LI->getPointerOperand()); @@ -16813,14 +16811,12 @@ class HorizontalReduction { if (arePointersCompatible(RLI->getPointerOperand(), LI->getPointerOperand(), TLI)) { hash_code SubKey = hash_value(RLI->getPointerOperand()); - DoNotReverseVals.insert(RLI); return SubKey; } } if (LIt->second.size() > 2) { hash_code SubKey = hash_value(LIt->second.back()->getPointerOperand()); - DoNotReverseVals.insert(LIt->second.back()); return SubKey; } } @@ -16885,24 +16881,19 @@ class HorizontalReduction { }); int NewIdx = -1; for (ArrayRef Data : PossibleRedValsVect) { - if (isGoodForReduction(Data) || - (isa(Data.front()) && NewIdx >= 0 && - isa(ReducedVals[NewIdx].front()) && - getUnderlyingObject( - cast(Data.front())->getPointerOperand()) == - getUnderlyingObject(cast(ReducedVals[NewIdx].front()) - ->getPointerOperand()))) { - if (NewIdx < 0) { - NewIdx = ReducedVals.size(); - ReducedVals.emplace_back(); - } - if (DoNotReverseVals.contains(Data.front())) - ReducedVals[NewIdx].append(Data.begin(), Data.end()); - else - ReducedVals[NewIdx].append(Data.rbegin(), Data.rend()); - } else { - ReducedVals.emplace_back().append(Data.rbegin(), Data.rend()); + if (NewIdx < 0 || + (!isGoodForReduction(Data) && + (!isa(Data.front()) || + !isa(ReducedVals[NewIdx].front()) || + getUnderlyingObject( + cast(Data.front())->getPointerOperand()) != + getUnderlyingObject( + cast(ReducedVals[NewIdx].front()) + ->getPointerOperand())))) { + NewIdx = ReducedVals.size(); + ReducedVals.emplace_back(); } + ReducedVals[NewIdx].append(Data.rbegin(), Data.rend()); } } // Sort the reduced values by number of same/alternate opcode and/or pointer @@ -17524,10 +17515,18 @@ class HorizontalReduction { } #endif if (!Ignore->use_empty()) { - Value *Undef = UndefValue::get(Ignore->getType()); - Ignore->replaceAllUsesWith(Undef); + Value *P = PoisonValue::get(Ignore->getType()); + Ignore->replaceAllUsesWith(P); + } + auto *I = dyn_cast(Ignore); + // Clear the operands with non single use. Allows better + // vectorization. + for (unsigned Idx : seq(I->getNumOperands())) { + Value *Op = I->getOperand(Idx); + if (!Op->hasOneUse()) + I->setOperand(Idx, PoisonValue::get(Op->getType())); } - V.eraseInstruction(cast(Ignore)); + V.eraseInstruction(I); } } } else if (!CheckForReusedReductionOps) { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index 821abca63c29e..293c3fcbcff06 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -128,12 +128,12 @@ for.body: ; YAML-LABEL: Function: getelementptr_2x32 ; YAML: --- !Passed -; YAML-NEXT: Pass: slp-vectorizer -; YAML-NEXT: Name: VectorizedList -; YAML-NEXT: Function: getelementptr_2x32 -; YAML-NEXT: Args: -; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '6' +; YAML: Pass: slp-vectorizer +; YAML: Name: VectorizedList +; YAML: Function: getelementptr_2x32 +; YAML: Args: +; YAML: - String: 'SLP vectorized with cost ' +; YAML: - Cost: '6' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '3' @@ -149,21 +149,15 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[OP_RDX]], [[FOR_BODY]] ] ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[T4]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]] -; CHECK-NEXT: [[T7:%.*]] = or disjoint i32 [[T4]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[T7]] to i64 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]] -; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP1]] @@ -171,12 +165,15 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP8]] ; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 -; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP10]] ; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[ADD16]] = add nsw i32 [[ADD11]], [[T12]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[T10]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[T12]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index f98d0ad0527c2..807d2468d4271 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1231,26 +1231,26 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> [[TMP17]], <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[TMP20]] to <16 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> [[TMP9]], <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP13]], <16 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = sub nsw <16 x i32> [[TMP21]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP10]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> [[TMP14]], <16 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <16 x i8> [[TMP29]], <16 x i8> [[TMP30]], <16 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <16 x i8> [[TMP31]], <16 x i8> [[TMP32]], <16 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = zext <16 x i8> [[TMP33]] to <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> [[TMP15]], <16 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> @@ -1262,16 +1262,16 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]] ; CHECK-NEXT: [[TMP46:%.*]] = sub nsw <16 x i32> [[TMP43]], [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = add nsw <16 x i32> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP50:%.*]] = sub nsw <16 x i32> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> ; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = sub nsw <16 x i32> [[TMP51]], [[TMP52]] -; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 26d3a405019bf..cd4aa9a73dba2 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -195,10 +195,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) { define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index bbf56b9a86ce1..1e0245812d8d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -195,10 +195,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) { define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 1e7eb4a416724..0f633dba1f792 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -814,8 +814,8 @@ define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 ; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 ; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: ret i64 [[ADD_3]] ; entry: %ld0 = load i8, ptr %ptr @@ -1010,8 +1010,8 @@ define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll index 7b65467d7f9ef..fc805b226d3b7 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll @@ -6,13 +6,13 @@ define i32 @test(ptr %p) { ; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[D_0]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[D_0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP2]], <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP2]], <4 x i16> ; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP8]], i32 1) @@ -58,13 +58,13 @@ define i32 @test1(ptr %p) { ; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[D_0]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[D_0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll index 5ec6b4f1040d8..550360da216b2 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll @@ -6,8 +6,8 @@ define i32 @test(ptr %f, i16 %0) { ; CHECK-SAME: ptr [[F:%.*]], i16 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[F]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> , i16 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> , i16 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> , i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> , i16 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[TMP6]], [[TMP7]] diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll index e1942eb326079..c9bd95f83d22d 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-trunc.ll @@ -6,7 +6,7 @@ define void @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 0 to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 0 to i32 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i32 0, i32 [[TMP1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index f3638b5d087f4..d8845f20ecd72 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -8,18 +8,18 @@ define void @test() #0 { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP4:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP4]] = extractelement <4 x i64> [[TMP3]], i32 3 ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 -; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> , [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0 ; CHECK-NEXT: br label [[LOOP]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll index 8991edae44c97..3ee6c55e429bd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll @@ -60,7 +60,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { define i8 @i(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @i( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll index 8701551f46abb..60e008d12e6e1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll @@ -61,7 +61,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { define i8 @i(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @i( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll b/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll index 1dfeaa1959e07..f75c8deddd9bb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll @@ -10,9 +10,9 @@ define i1 @test(i64 %v) { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[V]], 7 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> , i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> , i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sub <8 x i64> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> [[TMP9]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll index 01aced4ab3acf..31ad629160c8d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll @@ -7,20 +7,17 @@ define void @test() { ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[IDX2]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 7 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr null, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i64> [[TMP4]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ] ; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i64> [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]]) -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP8]]) -; CHECK-NEXT: [[OP_RDX24:%.*]] = add i64 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX24]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]]) +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[OP_RDX33:%.*]] = add i64 [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX33]], [[TMP3]] ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll index 7ae6793fba4cd..eb7498fea6f79 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll @@ -8,11 +8,13 @@ define i1 @test(float %0, double %1) { ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> , <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> , <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP8]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP7]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP15]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index f1580599ba127..d326c855a1091 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -11,9 +11,9 @@ define i64 @foo(i32 %tmp7) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP24]], i32 6 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP77:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP77]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP77:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP77]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i32> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll index 4e6ed4bce6588..f2b1c78ce0aac 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll @@ -10,10 +10,10 @@ define void @foo(double %i) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP22]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> , <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> , [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> , [[TMP7]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ult <8 x double> [[TMP14]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll index 97189e4ef2a14..58c108b81a7ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll @@ -4,16 +4,16 @@ define i32 @foo(i32 %a) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[A:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 -; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: @@ -22,8 +22,8 @@ define i32 @foo(i32 %a) { ; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ] ; CHECK-NEXT: ret i32 0 ; CHECK: bb4: -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP9]], 0 ; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index e3dc67558af02..7ff4a1a231c22 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -810,14 +810,9 @@ define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) -; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], -; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP3]], -; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[TMP7]], [[TMP8]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01 +; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]] ; THRESHOLD-NEXT: ret float [[OP_RDX1]] ; entry: @@ -863,8 +858,8 @@ define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONVC]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] ; CHECK-NEXT: ret float [[OP_RDX2]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( @@ -876,8 +871,8 @@ define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], [[TMP2]] -; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONVC]] -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 3.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] ; THRESHOLD-NEXT: ret float [[OP_RDX2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll index 6b270150985ef..17fe501391e1a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll @@ -12,9 +12,9 @@ define i32 @test(ptr %sptr, i64 %0) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp sle <4 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index 55da3e5f9f37c..789d73947d1c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -8,7 +8,7 @@ define void @t(i64 %v) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 0ab56279fe47c..ef07e33724663 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -8,16 +8,16 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = and i8 0, 1 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 0, 0 ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 4af69dff179e2..032625a1199f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -6,16 +6,16 @@ define void @test(i64 %d.promoted.i) { ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i1> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I_1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> , i64 [[AND_1_I_1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i1> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 -; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP8]] to i32 ; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[OP_RDX]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll index 481ff15a523f8..4c8d6b734ddc1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll @@ -21,11 +21,11 @@ define i1 @test1(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: define i1 @test1( ; CHECK-SAME: i32 [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[D]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[B]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[C]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[D]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 true, i1 [[CMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index acc04bece598a..838a75dcd29e0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -100,8 +100,8 @@ define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { ; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] -; SSE-NEXT: [[S3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) -; SSE-NEXT: ret i1 [[S3]] +; SSE-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) +; SSE-NEXT: ret i1 [[TMP7]] ; ; AVX-LABEL: @logical_and_icmp_diff_preds( ; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 @@ -356,12 +356,13 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 [[C2]], i1 false -; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[OP_RDX]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 [[OP_RDX1]], i1 false +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false +; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[OP_RDX]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false ; CHECK-NEXT: ret i1 [[OP_RDX2]] ; %x0 = extractelement <4 x i32> %x, i32 0 @@ -400,9 +401,9 @@ define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { ; SSE-NEXT: ret i1 [[OP_RDX]] ; ; AVX-LABEL: @logical_and_icmp_clamp_pred_diff( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[TMP3]] ; AVX-NEXT: [[TMP5:%.*]] = icmp ult <8 x i32> [[TMP2]], [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index a61fa74e97cf9..811e2d282ac71 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -12,8 +12,8 @@ ; Vector cost is 5, Scalar cost is 7 ; AVX: Adding cost -2 for reduction of n=8 [ %0 = load i32, ptr %p, align 4, ..] (It is a splitting reduction) -; Vector cost is 6, Scalar cost is 7 -; SSE: Adding cost -1 for reduction of n=8 [ %0 = load i32, ptr %p, align 4, ..] (It is a splitting reduction) +; Vector cost is 4, Scalar cost is 7 +; SSE: Adding cost -3 for reduction of n=8 [ %0 = load i32, ptr %p, align 4, ..] (It is a splitting reduction) define i32 @test_add(ptr nocapture readonly %p) { ; CHECK-LABEL: @test_add( ; CHECK-NEXT: entry: @@ -25,7 +25,7 @@ entry: %0 = load i32, ptr %p, align 4 %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1 %1 = load i32, ptr %arrayidx.1, align 4 - %mul.18 = add i32 %1, %0 + %mul.18 = add i32 %0, %1 %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2 %2 = load i32, ptr %arrayidx.2, align 4 %mul.29 = add i32 %2, %mul.18 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll index 3bc6e64606e39..26c4d55436d22 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll @@ -7,7 +7,7 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll index d3c978412cdde..1805de6edf764 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll @@ -4,9 +4,9 @@ define void @main(ptr %0) { ; CHECK-LABEL: @main( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x double> [[TMP7]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index a8e69d0a8c713..590e5a67bd4ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -37,14 +37,14 @@ define void @hoge() { ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33 ; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33 ; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17 -; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T40]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T40]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T35]], [[T30]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T35]], i32 [[T30]] +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]] ; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]] ; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T25]] -; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T25]] +; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]] +; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]] ; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]] ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll index 16a9bf53b54a0..9d6371b13e08a 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,7 +43,7 @@ declare i32 @llvm.umin.i32(i32, i32) define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> , <4 x i32> ) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77)