From e72140a173c1fd620e7148f2f0135be1b4045a4b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 4 Dec 2024 21:26:23 +0800 Subject: [PATCH 01/28] [LV] Expand VPWidenIntOrFpInductionRecipe into separate recipes --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 21 +-- llvm/lib/Transforms/Vectorize/VPlan.h | 157 +++++++++++++++++- .../Transforms/Vectorize/VPlanAnalysis.cpp | 19 ++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 109 +++++++----- .../Transforms/Vectorize/VPlanTransforms.cpp | 79 +++++++++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 + .../AArch64/clamped-trip-count.ll | 12 +- .../AArch64/conditional-branches-cost.ll | 4 +- .../AArch64/divs-with-scalable-vfs.ll | 14 +- .../AArch64/simple_early_exit.ll | 10 +- .../AArch64/sve-interleaved-accesses.ll | 46 ++--- .../sve-interleaved-masked-accesses.ll | 32 ++-- .../RISCV/blocks-with-dead-instructions.ll | 36 ++-- .../LoopVectorize/RISCV/mask-index-type.ll | 4 +- ...ruction-or-drop-poison-generating-flags.ll | 6 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 2 +- .../LoopVectorize/RISCV/strided-accesses.ll | 10 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 26 +-- ...rize-force-tail-with-evl-cond-reduction.ll | 4 +- .../X86/drop-poison-generating-flags.ll | 2 +- .../X86/epilog-vectorization-inductions.ll | 10 +- .../LoopVectorize/X86/induction-costs.ll | 2 +- .../LoopVectorize/X86/interleave-cost.ll | 4 +- ...outer_loop_test1_no_explicit_vect_width.ll | 2 +- .../LoopVectorize/X86/scatter_crash.ll | 2 +- .../LoopVectorize/create-induction-resume.ll | 10 +- .../epilog-vectorization-any-of-reductions.ll | 4 +- .../LoopVectorize/first-order-recurrence.ll | 4 +- .../LoopVectorize/float-induction.ll | 20 +-- .../LoopVectorize/induction-step.ll | 20 +-- .../Transforms/LoopVectorize/induction.ll | 8 +- .../LoopVectorize/reduction-inloop-pred.ll | 36 ++-- .../single_early_exit_live_outs.ll | 38 ++--- 33 files changed, 500 insertions(+), 256 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d3c195d4a70ea..d2c732b7230e3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1050,17 +1050,12 @@ void VPlan::execute(VPTransformState *State) { if (isa(&R)) continue; - if (isa(&R)) { - PHINode *Phi = nullptr; - if (isa(&R)) { - Phi = cast(State->get(R.getVPSingleValue())); - } else { - auto *WidenPhi = cast(&R); - assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && - "recipe generating only scalars should have been replaced"); - auto *GEP = cast(State->get(WidenPhi)); - Phi = cast(GEP->getPointerOperand()); - } + if (isa(&R)) { + auto *WidenPhi = cast(&R); + assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && + "recipe generating only scalars should have been replaced"); + auto *GEP = cast(State->get(WidenPhi)); + PHINode *Phi = cast(GEP->getPointerOperand()); Phi->setIncomingBlock(1, VectorLatchBB); @@ -1068,10 +1063,6 @@ void VPlan::execute(VPTransformState *State) { // consistent placement of all induction updates. Instruction *Inc = cast(Phi->getIncomingValue(1)); Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator())); - - // Use the steps for the last part as backedge value for the induction. - if (auto *IV = dyn_cast(&R)) - Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand())); continue; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1f1af7f87e554..e41ee6ce18656 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1749,7 +1749,9 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector values. +/// producing their vector values. This won't execute any LLVM IR and will get +/// expanded later into VPWidenIntOrFpInitialRecipe, VPWidenIntOrFpPHIRecipe and +/// VPWidenIntOrFpBackedgeRecipe. class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; @@ -1782,9 +1784,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) - /// Generate the vectorized and scalarized versions of the phi node as - /// needed by their users. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable("cannot execute this recipe, should be expanded via " + "expandVPWidenIntOrFpInductionRecipe"); + } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -1817,10 +1820,152 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { } /// Returns the VPValue representing the value of this induction at - /// the last unrolled part, if it exists. Returns itself if unrolling did not + /// the last unrolled part, if it exists. Returns nullptr if unrolling did not /// take place. VPValue *getLastUnrolledPartOperand() { - return getNumOperands() == 5 ? getOperand(4) : this; + return getNumOperands() == 5 ? getOperand(4) : nullptr; + } +}; + +/// A recipe to compute the initial value for a widened IV, expanded from +/// VPWidenIntOrFpInductionRecipe. +class VPWidenIntOrFpInductionInitialRecipe : public VPSingleDefRecipe { + Instruction *IV; + const InductionDescriptor &ID; + +public: + VPWidenIntOrFpInductionInitialRecipe(Instruction *IV, VPValue *Start, + VPValue *Step, + const InductionDescriptor &ID) + : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionStartSC, {Start, Step}), + IV(IV), ID(ID) { + assert((isa(IV) || isa(IV)) && + "Expected either an induction phi-node or a truncate of it!"); + } + + ~VPWidenIntOrFpInductionInitialRecipe() override = default; + + VPWidenIntOrFpInductionInitialRecipe *clone() override { + return new VPWidenIntOrFpInductionInitialRecipe(IV, getOperand(0), + getOperand(1), ID); + } + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionStartSC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + VPValue *getStartValue() { return getOperand(0); } + const VPValue *getStartValue() const { return getOperand(0); } + + VPValue *getStepValue() { return getOperand(1); } + const VPValue *getStepValue() const { return getOperand(1); } + + /// Returns the scalar type of the induction. + Type *getScalarType() const { return IV->getType(); } + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } +}; + +/// A recipe to generate the PHI of a widened IV, expanded from +/// VPWidenIntOrFpInductionRecipe. +class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe { + Instruction *IV; + +public: + VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start) + : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start), + IV(IV) { + assert((isa(IV) || isa(IV)) && + "Expected either an induction phi-node or a truncate of it!"); + } + + ~VPWidenIntOrFpInductionPHIRecipe() override = default; + + VPWidenIntOrFpInductionPHIRecipe *clone() override { + auto *R = new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0)); + R->addOperand(getBackedgeValue()); + return R; + } + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionPHISC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + +/// A recipe to compute the backedge value for a widened IV, expanded from +/// VPWidenIntOrFpInductionRecipe. +class VPWidenIntOrFpInductionBackedgeRecipe : public VPSingleDefRecipe { + Instruction *IV; + const InductionDescriptor &ID; + +public: + VPWidenIntOrFpInductionBackedgeRecipe(Instruction *IV, VPValue *Step, + VPValue *VF, VPValue *Prev, + VPValue *SplatVF, + const InductionDescriptor &ID) + : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionSC, {Step, VF, Prev}), + IV(IV), ID(ID) { + assert((isa(IV) || isa(IV)) && + "Expected either an induction phi-node or a truncate of it!"); + if (SplatVF) + addOperand(SplatVF); + } + + ~VPWidenIntOrFpInductionBackedgeRecipe() override = default; + + VPWidenIntOrFpInductionBackedgeRecipe *clone() override { + return new VPWidenIntOrFpInductionBackedgeRecipe( + IV, getOperand(0), getOperand(1), getOperand(2), getOperand(3), ID); + } + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionIncSC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + VPValue *getStepValue() { return getOperand(0); } + const VPValue *getStepValue() const { return getOperand(0); } + + VPValue *getVFValue() { return getOperand(1); } + const VPValue *getVFValue() const { return getOperand(1); } + + VPValue *getPrevValue() { return getOperand(2); } + const VPValue *getPrevValue() const { return getOperand(2); } + + VPValue *getSplatVFValue() { + // If the recipe has been unrolled (4 operands), return the VPValue for the + // induction increment. + return getNumOperands() == 4 ? getOperand(3) : nullptr; + } + + /// Returns the scalar type of the induction. + Type *getScalarType() const { return IV->getType(); } + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return Op == getOperand(0) || Op == getOperand(1); } }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8e5b37a35dda7..ef0f12524b5a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -236,14 +236,17 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { .Case([this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) - .Case( + VPScalarPHIRecipe, VPWidenIntOrFpInductionPHIRecipe>( + [this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) + .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetLiveInIRValue(); - const InductionDescriptor &ID = getInductionDescriptor(); - TruncInst *Trunc = getTruncInst(); + Value *Start = State.get(getStartValue(), true); IRBuilderBase &Builder = State.Builder; - assert(getPHINode()->getType() == ID.getStartValue()->getType() && - "Types must match"); assert(State.VF.isVector() && "must have vector VF"); - // The value from the original loop to which we are mapping the new induction - // variable. - Instruction *EntryVal = Trunc ? cast(Trunc) : getPHINode(); - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) + if (isa_and_nonnull(ID.getInductionBinOp())) Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); // Now do the actual transformations, and start with fetching the step value. Value *Step = State.get(getStepValue(), VPLane(0)); - assert((isa(EntryVal) || isa(EntryVal)) && - "Expected either an induction phi-node or a truncate of it!"); + // Construct the initial value of the vector IV + Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); + Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), + State.VF, State.Builder); + State.set(this, SteppedStart); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenIntOrFpInductionInitialRecipe::print( + raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = WIDEN-INDUCTION-START "; + printOperands(O, SlotTracker); +} +#endif + +void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) { + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + + Value *Start = State.get(getOperand(0)); + PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind"); + Phi->addIncoming(Start, VectorPH); + Phi->setDebugLoc(IV->getDebugLoc()); + State.set(this, Phi); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O, + const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = WIDEN-INDUCTION-PHI "; + printOperands(O, SlotTracker); +} +#endif + +void VPWidenIntOrFpInductionBackedgeRecipe::execute(VPTransformState &State) { + IRBuilderBase &Builder = State.Builder; + + // Fast-math-flags propagate from the original induction instruction. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + if (isa_and_nonnull(ID.getInductionBinOp())) + Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); + + Value *Step = State.get(getStepValue(), VPLane(0)); - // Construct the initial value of the vector IV in the vector loop preheader auto CurrIP = Builder.saveIP(); BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); Builder.SetInsertPoint(VectorPH->getTerminator()); - if (isa(EntryVal)) { - assert(Start->getType()->isIntegerTy() && - "Truncation requires an integer type"); - auto *TruncType = cast(EntryVal->getType()); - Step = Builder.CreateTrunc(Step, TruncType); - Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); - } - - Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), - State.VF, State.Builder); // We create vector phi nodes for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -1826,28 +1851,26 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { } Builder.restoreIP(CurrIP); - - // We may need to add the step a number of times, depending on the unroll - // factor. The last of those goes into the PHI. - PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); - VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - VecInd->setDebugLoc(getDebugLoc()); - State.set(this, VecInd); + Value *PrevVal = State.get(getPrevValue()); Instruction *LastInduction = cast( - Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next")); - if (isa(EntryVal)) - State.addMetadata(LastInduction, EntryVal); - LastInduction->setDebugLoc(getDebugLoc()); + Builder.CreateBinOp(AddOp, PrevVal, SplatVF, "vec.ind.next")); + if (isa(IV)) + State.addMetadata(LastInduction, IV); + LastInduction->setDebugLoc(IV->getDebugLoc()); - VecInd->addIncoming(SteppedStart, VectorPH); - // Add induction update using an incorrect block temporarily. The phi node - // will be fixed after VPlan execution. Note that at this point the latch - // block cannot be used, as it does not exist yet. - // TODO: Model increment value in VPlan, by turning the recipe into a - // multi-def and a subclass of VPHeaderPHIRecipe. - VecInd->addIncoming(LastInduction, VectorPH); + State.set(this, LastInduction); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenIntOrFpInductionBackedgeRecipe::print( + raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = WIDEN-INDUCTION-INC "; + printOperands(O, SlotTracker); } +#endif #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 13ef3029023f1..950eb5a2a01b3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2013,10 +2013,89 @@ void VPlanTransforms::createInterleaveGroups( } } +/// Expand a VPWidenIntOrFpInduction into separate recipes for the initial +/// value, phi and backedge value. In the followng example: +/// +/// vector.ph: +/// Successor(s): vector loop +/// +/// vector loop: { +/// vector.body: +/// WIDEN-INDUCTION %i = phi %bc.resume.val, %i.next, ir<1>, ir<%5> +/// ... +/// EMIT branch-on-count vp<%index.next>, ir<%n.vec> +/// No successors +/// } +/// +/// WIDEN-INDUCTION will get expanded to: +/// +/// vector.ph: +/// vp<%0> = WIDEN-INDUCTION-START ir<0>, ir<1> +/// Successor(s): vector loop +/// +/// vector loop: { +/// vector.body: +/// ir<%i> = WIDEN-INDUCTION-PHI vp<%0>, vp<%4> +/// ... +/// vp<%4> = WIDEN-INDUCTION-INC ir<1>, ir<%5>, ir<%i> +/// EMIT branch-on-count vp<%index.next>, ir<%n.vec> +/// No successors +/// } +static void +expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR) { + VPlan *Plan = WidenIVR->getParent()->getPlan(); + VPValue *Start = WidenIVR->getStartValue(); + VPValue *Step = WidenIVR->getStepValue(); + VPValue *VF = WidenIVR->getVFValue(); + const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); + TruncInst *Trunc = WidenIVR->getTruncInst(); + DebugLoc DL = WidenIVR->getDebugLoc(); + + // The value from the original loop to which we are mapping the new induction + // variable. + Instruction *IV = Trunc ? cast(Trunc) : WidenIVR->getPHINode(); + + // If the phi is truncated, truncate the start and step values. + VPBuilder Builder(Plan->getVectorPreheader()); + if (isa(IV)) { + assert(Start->getUnderlyingValue()->getType()->isIntegerTy() && + "Truncation requires an integer type"); + auto *TruncType = cast(IV->getType()); + Step = Builder.createScalarCast(Instruction::Trunc, Step, TruncType, DL); + Start = Builder.createScalarCast(Instruction::Trunc, Start, TruncType, DL); + } + + // Construct the initial value of the vector IV in the vector loop preheader. + auto *StartR = new VPWidenIntOrFpInductionInitialRecipe(IV, Start, Step, ID); + Plan->getVectorPreheader()->insert(StartR, Builder.getInsertPoint()); + + // Create the widened phi of the vector IV. + auto *PhiR = new VPWidenIntOrFpInductionPHIRecipe(IV, StartR); + PhiR->insertBefore(WidenIVR); + + // Create the backedge value for the vector IV. + VPValue *Prev = PhiR; + // If unrolled, use the last unrolled part in the increment. + if (auto *UnrolledPart = WidenIVR->getLastUnrolledPartOperand()) + Prev = UnrolledPart; + auto *IncR = new VPWidenIntOrFpInductionBackedgeRecipe( + IV, Step, VF, Prev, WidenIVR->getSplatVFValue(), ID); + VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); + ExitingBB->insert(IncR, ExitingBB->getTerminator()->getIterator()); + PhiR->addOperand(IncR); + + WidenIVR->replaceAllUsesWith(PhiR); + WidenIVR->eraseFromParent(); +} + void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) { + if (auto *WidenIVR = dyn_cast(&R)) { + expandVPWidenIntOrFpInduction(WidenIVR); + continue; + } if (!isa(&R)) continue; auto *PhiR = cast(&R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index a058b2a121d59..8e5148b1616bd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -354,6 +354,8 @@ class VPDef { VPWidenSelectSC, VPBlendSC, VPHistogramSC, + VPWidenIntOrFpInductionStartSC, + VPWidenIntOrFpInductionIncSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, VPPredInstPHISC, @@ -364,6 +366,7 @@ class VPDef { VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, + VPWidenIntOrFpInductionPHISC, VPWidenPointerInductionSC, VPScalarPHISC, VPReductionPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 1f46073d025a9..44148aac54407 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -20,11 +20,11 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -38,8 +38,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -106,11 +106,11 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -124,8 +124,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index cf4fc143fe8c3..a9f42ff1d1e57 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -978,8 +978,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]] ; DEFAULT: pred.store.continue14: -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -1080,8 +1080,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]] ; PRED: pred.store.continue14: -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index ecf49a31ba79f..dfcb1e561551f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -127,11 +127,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -251,10 +251,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -262,7 +262,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = udiv [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP23:%.*]] = udiv [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]] ; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]] ; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]] @@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP47:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 5f926db1131f6..42600a132c2c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -48,8 +48,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP15]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP16]]) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -131,8 +131,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -209,8 +209,8 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -304,8 +304,8 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[TMP15:%.*]] = xor [[TMP14]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -389,8 +389,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index eb805b1f8c571..19b1e869a914a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -108,13 +108,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -197,13 +197,13 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -637,16 +637,16 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] -; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 +; CHECK-NEXT: [[X:%.*]] = sub nsw i64 0, [[TMP1]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[BROADCAST_SPLAT]], [[VEC_IND]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[DOTSPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], [[VEC_IND]], i32 1 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i64.nxv4p0( [[TMP6]], i32 8, splat (i1 true), poison) @@ -654,7 +654,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], [[TMP5]], i32 8, splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], [[TMP6]], i32 8, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -885,10 +885,10 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1063,10 +1063,10 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1253,15 +1253,15 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1346,15 +1346,15 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP10]], splat (i64 3) -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1452,12 +1452,12 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index a2da67c2cbb81..94e57c7ca023f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -38,10 +38,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -114,10 +114,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -222,10 +222,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -283,10 +283,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -380,12 +380,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -450,12 +450,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -567,10 +567,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -673,10 +673,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 50dfb689c2060..6e6944b904ae0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -28,13 +28,13 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -122,13 +122,13 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 0, i32 [[TMP16]] +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -216,13 +216,13 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 0, i32 [[TMP16]] +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -320,13 +320,13 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -426,13 +426,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP16]] +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -540,13 +540,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index c00868c3e0be2..7baba88061cd4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -25,11 +25,11 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] ; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 09e8a760a355c..44af6c77a573c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -23,15 +23,15 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 305d26d7f3bc1..a0c56faf913fc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -54,8 +54,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index f7b8758084056..9602d4cbf5df3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -560,23 +560,23 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 +; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]] +; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, splat (i1 true), poison), !alias.scope [[META8:![0-9]+]] ; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 95496bb51a1bc..e3f0700cf352e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,11 +325,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -432,11 +432,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -995,13 +995,13 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1125,13 +1125,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1231,13 +1231,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index fefbdb473e2ab..7045b4e03ed8c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -601,10 +601,10 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 @@ -793,10 +793,10 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 68695a8b1282c..843afa95687c4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -608,8 +608,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 7614be0bd3fce..3d47d8217b5f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -203,22 +203,22 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> , [[DOTSPLAT16]] ; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 ; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] +; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index b6e9dfc2322ff..769d1e3569cda 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -418,9 +418,9 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> ; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index b885d85a96800..576d749c22120 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -523,8 +523,8 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -645,8 +645,8 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 6480c0ab1099d..02d48cbda1aab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -71,8 +71,8 @@ ; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; AVX: [[ForInc]]: -; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8 +; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: br i1 true, label %middle.block, label %vector.body @arr2 = external global [8 x i32], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 6bed344d4d73b..312125608c87e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY51:%.*]] -; CHECK: vec.epilog.vector.body51: +; CHECK: vec.epilog.vector.body57: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] ; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] ; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index c745b4f74786c..e10b801844985 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -50,10 +50,10 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -61,13 +61,13 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[DOTSPLAT4]] ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[TMP7]] to <4 x i64> ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT4]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index a829decb5231e..6aa44533c4a15 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -248,11 +248,11 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT10]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 509b8f99e4b61..553462207386b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -892,10 +892,10 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -986,10 +986,10 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 29b29c500c46e..b94520ce5abfe 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -770,10 +770,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], ; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] -; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i64 0 ; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: @@ -782,7 +782,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 -; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]] ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -791,7 +791,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00) -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -1012,10 +1012,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP4]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: @@ -1024,7 +1024,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -1033,7 +1033,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 29c8d1f36ea59..80a4754a0b63a 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -350,22 +350,22 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -422,22 +422,22 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index f6ca9ff2e7b2e..d2d123af4b8f7 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -6367,12 +6367,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[DOTSPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[DOTSPLAT]] +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]] ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 8e132ed8399cd..2fb840af63542 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -116,7 +116,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -125,12 +125,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -139,12 +139,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 @@ -153,7 +153,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer @@ -321,7 +321,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -330,12 +330,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -344,12 +344,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 @@ -358,7 +358,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1) @@ -436,7 +436,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -445,12 +445,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -459,12 +459,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 @@ -473,7 +473,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index e24c6090b704b..561dd2cf7b11e 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -29,8 +29,8 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -114,8 +114,8 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -199,8 +199,8 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -282,8 +282,8 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -365,8 +365,8 @@ define i64 @same_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 @@ -447,8 +447,8 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 @@ -529,8 +529,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -615,8 +615,8 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -709,8 +709,8 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 @@ -793,8 +793,8 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -883,8 +883,8 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -972,8 +972,8 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1068,8 +1068,8 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1169,8 +1169,8 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1258,8 +1258,8 @@ define i64 @loop_contains_safe_call() { ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1337,8 +1337,8 @@ define i64 @loop_contains_safe_div() { ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1417,8 +1417,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3 @@ -1503,8 +1503,8 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -1583,8 +1583,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] From 6fc4a7c59bf4199906ddae52ac8a3bde0290191f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 13 Dec 2024 01:06:37 +0800 Subject: [PATCH 02/28] Remove initial and backedge recipes, expand out to regular VPInstructions + splat and step vector recipes --- .../Vectorize/LoopVectorizationPlanner.h | 8 + .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlan.h | 182 +++++++---------- .../Transforms/Vectorize/VPlanAnalysis.cpp | 3 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 183 ++++-------------- .../Transforms/Vectorize/VPlanTransforms.cpp | 101 +++++++--- .../Transforms/Vectorize/VPlanTransforms.h | 2 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 +- .../AArch64/clamped-trip-count.ll | 8 +- .../AArch64/divs-with-scalable-vfs.ll | 14 +- .../AArch64/scalable-avoid-scalarization.ll | 4 +- .../AArch64/sve-interleaved-accesses.ll | 48 ++--- .../sve-interleaved-masked-accesses.ll | 32 +-- .../RISCV/blocks-with-dead-instructions.ll | 36 ++-- .../LoopVectorize/RISCV/dead-ops-cost.ll | 4 +- .../LoopVectorize/RISCV/induction-costs.ll | 4 +- .../LoopVectorize/RISCV/mask-index-type.ll | 4 +- ...ruction-or-drop-poison-generating-flags.ll | 6 +- .../LoopVectorize/RISCV/strided-accesses.ll | 10 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 58 +++--- ...rize-force-tail-with-evl-cond-reduction.ll | 4 +- .../X86/epilog-vectorization-inductions.ll | 18 +- .../LoopVectorize/X86/induction-costs.ll | 12 +- .../LoopVectorize/create-induction-resume.ll | 10 +- .../LoopVectorize/float-induction.ll | 92 ++++----- .../LoopVectorize/induction-step.ll | 34 ++-- .../LoopVectorize/iv_outside_user.ll | 12 +- .../optimal-epilog-vectorization.ll | 16 +- 28 files changed, 404 insertions(+), 508 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index ed3e45dd2c6c8..7f1ec987ad038 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -265,6 +265,14 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags())); } + VPSplatRecipe *createSplat(VPValue *Val) { + return tryInsertInstruction(new VPSplatRecipe(Val)); + } + + VPStepVectorRecipe *createStepVector(Type *Ty) { + return tryInsertInstruction(new VPStepVectorRecipe(Ty)); + } + //===--------------------------------------------------------------------===// // RAII helpers. //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 226fc23888f02..b8b958ddaa9ae 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7705,7 +7705,8 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType()); VPlanTransforms::removeDeadRecipes(BestVPlan); - VPlanTransforms::convertToConcreteRecipes(BestVPlan); + VPlanTransforms::convertToConcreteRecipes(BestVPlan, + Legal->getWidestInductionType()); // Perform the actual loop transformation. VPTransformState State(&TTI, BestVF, BestUF, LI, DT, ILV.Builder, &ILV, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e41ee6ce18656..bd860c177e6ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1236,6 +1236,74 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { } }; +/// A for splatting a scalar value to a vector. +class VPSplatRecipe : public VPSingleDefRecipe { +public: + VPSplatRecipe(VPValue *Op) : VPSingleDefRecipe(VPDef::VPSplatSC, {Op}) {} + + ~VPSplatRecipe() override = default; + + VPSplatRecipe *clone() override { return new VPSplatRecipe(getOperand(0)); } + + VP_CLASSOF_IMPL(VPDef::VPSplatSC) + + void execute(VPTransformState &State) override; + + /// Return the cost of this VPSplatRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // TODO: Compute accurate cost after retiring the legacy cost model. + return 0; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } +}; + +/// A recipe for generating a step vector. +class VPStepVectorRecipe : public VPSingleDefRecipe { + /// Scalar return type of the intrinsic. + Type *ScalarTy; + +public: + VPStepVectorRecipe(Type *Ty) + : VPSingleDefRecipe(VPDef::VPStepVectorSC, {}), ScalarTy(Ty) {} + + ~VPStepVectorRecipe() override = default; + + VPStepVectorRecipe *clone() override { + return new VPStepVectorRecipe(ScalarTy); + } + + VP_CLASSOF_IMPL(VPDef::VPStepVectorSC) + + void execute(VPTransformState &State) override; + + /// Return the cost of this VPStepVectorRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // TODO: Compute accurate cost after retiring the legacy cost model. + return 0; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Return the scalar return type of the intrinsic. + Type *getScalarType() const { return ScalarTy; } +}; + /// A recipe for widening vector intrinsics. class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { /// ID of the vector intrinsic to widen. @@ -1820,59 +1888,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { } /// Returns the VPValue representing the value of this induction at - /// the last unrolled part, if it exists. Returns nullptr if unrolling did not + /// the last unrolled part, if it exists. Returns itself if unrolling did not /// take place. VPValue *getLastUnrolledPartOperand() { - return getNumOperands() == 5 ? getOperand(4) : nullptr; - } -}; - -/// A recipe to compute the initial value for a widened IV, expanded from -/// VPWidenIntOrFpInductionRecipe. -class VPWidenIntOrFpInductionInitialRecipe : public VPSingleDefRecipe { - Instruction *IV; - const InductionDescriptor &ID; - -public: - VPWidenIntOrFpInductionInitialRecipe(Instruction *IV, VPValue *Start, - VPValue *Step, - const InductionDescriptor &ID) - : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionStartSC, {Start, Step}), - IV(IV), ID(ID) { - assert((isa(IV) || isa(IV)) && - "Expected either an induction phi-node or a truncate of it!"); - } - - ~VPWidenIntOrFpInductionInitialRecipe() override = default; - - VPWidenIntOrFpInductionInitialRecipe *clone() override { - return new VPWidenIntOrFpInductionInitialRecipe(IV, getOperand(0), - getOperand(1), ID); - } - - VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionStartSC) - - void execute(VPTransformState &State) override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - VPValue *getStartValue() { return getOperand(0); } - const VPValue *getStartValue() const { return getOperand(0); } - - VPValue *getStepValue() { return getOperand(1); } - const VPValue *getStepValue() const { return getOperand(1); } - - /// Returns the scalar type of the induction. - Type *getScalarType() const { return IV->getType(); } - - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; + return getNumOperands() == 5 ? getOperand(4) : this; } }; @@ -1908,67 +1927,6 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe { #endif }; -/// A recipe to compute the backedge value for a widened IV, expanded from -/// VPWidenIntOrFpInductionRecipe. -class VPWidenIntOrFpInductionBackedgeRecipe : public VPSingleDefRecipe { - Instruction *IV; - const InductionDescriptor &ID; - -public: - VPWidenIntOrFpInductionBackedgeRecipe(Instruction *IV, VPValue *Step, - VPValue *VF, VPValue *Prev, - VPValue *SplatVF, - const InductionDescriptor &ID) - : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionSC, {Step, VF, Prev}), - IV(IV), ID(ID) { - assert((isa(IV) || isa(IV)) && - "Expected either an induction phi-node or a truncate of it!"); - if (SplatVF) - addOperand(SplatVF); - } - - ~VPWidenIntOrFpInductionBackedgeRecipe() override = default; - - VPWidenIntOrFpInductionBackedgeRecipe *clone() override { - return new VPWidenIntOrFpInductionBackedgeRecipe( - IV, getOperand(0), getOperand(1), getOperand(2), getOperand(3), ID); - } - - VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionIncSC) - - void execute(VPTransformState &State) override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - VPValue *getStepValue() { return getOperand(0); } - const VPValue *getStepValue() const { return getOperand(0); } - - VPValue *getVFValue() { return getOperand(1); } - const VPValue *getVFValue() const { return getOperand(1); } - - VPValue *getPrevValue() { return getOperand(2); } - const VPValue *getPrevValue() const { return getOperand(2); } - - VPValue *getSplatVFValue() { - // If the recipe has been unrolled (4 operands), return the VPValue for the - // induction increment. - return getNumOperands() == 4 ? getOperand(3) : nullptr; - } - - /// Returns the scalar type of the induction. - Type *getScalarType() const { return IV->getType(); } - - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return Op == getOperand(0) || Op == getOperand(1); - } -}; - class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, public VPUnrollPartAccessor<3> { bool IsScalarAfterVectorization; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index ef0f12524b5a7..6108f2f4e4513 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -245,8 +245,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return inferScalarType(R->getStartValue()); }) .Case( + VPStepVectorRecipe>( [](const auto *R) { return R->getScalarType(); }) .Case(Val->getType()); - ElementCount VLen = ValVTy->getElementCount(); - - Type *STy = Val->getType()->getScalarType(); - assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && - "Induction Step must be an integer or FP"); - assert(Step->getType() == STy && "Step has wrong type"); - - SmallVector Indices; - - // Create a vector of consecutive numbers from zero to VF. - VectorType *InitVecValVTy = ValVTy; - if (STy->isFloatingPointTy()) { - Type *InitVecValSTy = - IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); - InitVecValVTy = VectorType::get(InitVecValSTy, VLen); - } - Value *InitVec = Builder.CreateStepVector(InitVecValVTy); - - if (STy->isIntegerTy()) { - Step = Builder.CreateVectorSplat(VLen, Step); - assert(Step->getType() == Val->getType() && "Invalid step vec"); - // FIXME: The newly created binary instructions should contain nsw/nuw - // flags, which can be found from the original scalar operations. - Step = Builder.CreateMul(InitVec, Step); - return Builder.CreateAdd(Val, Step, "induction"); - } - - // Floating point induction. - assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && - "Binary Opcode should be specified for FP induction"); - InitVec = Builder.CreateUIToFP(InitVec, ValVTy); - - Step = Builder.CreateVectorSplat(VLen, Step); - Value *MulOp = Builder.CreateFMul(InitVec, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); -} - /// A helper function that returns an integer or floating-point constant with /// value C. static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { @@ -1751,38 +1703,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } -void VPWidenIntOrFpInductionInitialRecipe::execute(VPTransformState &State) { - assert(!State.Lane && "Int or FP induction being replicated."); - - Value *Start = State.get(getStartValue(), true); - IRBuilderBase &Builder = State.Builder; - assert(State.VF.isVector() && "must have vector VF"); - - // Fast-math-flags propagate from the original induction instruction. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (isa_and_nonnull(ID.getInductionBinOp())) - Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - - // Now do the actual transformations, and start with fetching the step value. - Value *Step = State.get(getStepValue(), VPLane(0)); - - // Construct the initial value of the vector IV - Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(this, SteppedStart); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenIntOrFpInductionInitialRecipe::print( - raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent; - printAsOperand(O, SlotTracker); - O << " = WIDEN-INDUCTION-START "; - printOperands(O, SlotTracker); -} -#endif - void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); @@ -1804,74 +1724,6 @@ void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O, } #endif -void VPWidenIntOrFpInductionBackedgeRecipe::execute(VPTransformState &State) { - IRBuilderBase &Builder = State.Builder; - - // Fast-math-flags propagate from the original induction instruction. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (isa_and_nonnull(ID.getInductionBinOp())) - Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - - Value *Step = State.get(getStepValue(), VPLane(0)); - - auto CurrIP = Builder.saveIP(); - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Builder.SetInsertPoint(VectorPH->getTerminator()); - - // We create vector phi nodes for both integer and floating-point induction - // variables. Here, we determine the kind of arithmetic we will perform. - Instruction::BinaryOps AddOp; - Instruction::BinaryOps MulOp; - if (Step->getType()->isIntegerTy()) { - AddOp = Instruction::Add; - MulOp = Instruction::Mul; - } else { - AddOp = ID.getInductionOpcode(); - MulOp = Instruction::FMul; - } - - Value *SplatVF; - if (VPValue *SplatVFOperand = getSplatVFValue()) { - // The recipe has been unrolled. In that case, fetch the splat value for the - // induction increment. - SplatVF = State.get(SplatVFOperand); - } else { - // Multiply the vectorization factor by the step using integer or - // floating-point arithmetic as appropriate. - Type *StepType = Step->getType(); - Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); - if (Step->getType()->isFloatingPointTy()) - RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); - else - RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType); - Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); - - // Create a vector splat to use in the induction update. - SplatVF = Builder.CreateVectorSplat(State.VF, Mul); - } - - Builder.restoreIP(CurrIP); - Value *PrevVal = State.get(getPrevValue()); - - Instruction *LastInduction = cast( - Builder.CreateBinOp(AddOp, PrevVal, SplatVF, "vec.ind.next")); - if (isa(IV)) - State.addMetadata(LastInduction, IV); - LastInduction->setDebugLoc(IV->getDebugLoc()); - - State.set(this, LastInduction); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenIntOrFpInductionBackedgeRecipe::print( - raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent; - printAsOperand(O, SlotTracker); - O << " = WIDEN-INDUCTION-INC "; - printOperands(O, SlotTracker); -} -#endif - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -2456,8 +2308,9 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { switch (Opcode) { case Instruction::SExt: case Instruction::ZExt: - case Instruction::Trunc: { - // Note: SExt/ZExt not used yet. + case Instruction::Trunc: + case Instruction::UIToFP: { + // Note: SExt not used yet. Value *Op = State.get(getOperand(0), VPLane(0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } @@ -2481,6 +2334,36 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, } #endif +void VPSplatRecipe::execute(VPTransformState &State) { + Value *Splat = + State.Builder.CreateVectorSplat(State.VF, State.get(getOperand(0), true)); + State.set(this, Splat); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPSplatRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = SPLAT "; + printOperands(O, SlotTracker); +} +#endif + +void VPStepVectorRecipe::execute(VPTransformState &State) { + VectorType *Ty = VectorType::get(ScalarTy, State.VF); + State.set(this, State.Builder.CreateStepVector(Ty)); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPStepVectorRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = STEP-VECTOR"; +} +#endif + void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Lane && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 950eb5a2a01b3..4945288e058fb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2013,7 +2013,7 @@ void VPlanTransforms::createInterleaveGroups( } } -/// Expand a VPWidenIntOrFpInduction into separate recipes for the initial +/// Expand a VPWidenIntOrFpInduction into executable recipes. for the initial /// value, phi and backedge value. In the followng example: /// /// vector.ph: @@ -2021,79 +2021,126 @@ void VPlanTransforms::createInterleaveGroups( /// /// vector loop: { /// vector.body: -/// WIDEN-INDUCTION %i = phi %bc.resume.val, %i.next, ir<1>, ir<%5> +/// WIDEN-INDUCTION %i = phi %start, %step, %vf /// ... -/// EMIT branch-on-count vp<%index.next>, ir<%n.vec> +/// EMIT branch-on-count ... /// No successors /// } /// /// WIDEN-INDUCTION will get expanded to: /// /// vector.ph: -/// vp<%0> = WIDEN-INDUCTION-START ir<0>, ir<1> +/// ... +/// vp<%induction> = ... +/// vp<%inc> = ... +/// /// Successor(s): vector loop /// /// vector loop: { /// vector.body: -/// ir<%i> = WIDEN-INDUCTION-PHI vp<%0>, vp<%4> +/// ir<%i> = WIDEN-INDUCTION-PHI vp<%induction>, vp<%vec.ind.next> /// ... -/// vp<%4> = WIDEN-INDUCTION-INC ir<1>, ir<%5>, ir<%i> -/// EMIT branch-on-count vp<%index.next>, ir<%n.vec> +/// vp<%vec.ind.next> = add ir<%i>, vp<%inc> +/// EMIT branch-on-count ... /// No successors /// } static void -expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR) { +expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, + VPTypeAnalysis &TypeInfo) { VPlan *Plan = WidenIVR->getParent()->getPlan(); VPValue *Start = WidenIVR->getStartValue(); VPValue *Step = WidenIVR->getStepValue(); VPValue *VF = WidenIVR->getVFValue(); - const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); TruncInst *Trunc = WidenIVR->getTruncInst(); DebugLoc DL = WidenIVR->getDebugLoc(); // The value from the original loop to which we are mapping the new induction // variable. Instruction *IV = Trunc ? cast(Trunc) : WidenIVR->getPHINode(); + Type *Ty = IV->getType(); + + const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); + Instruction::BinaryOps AddOp; + Instruction::BinaryOps MulOp; + std::optional FMFs; + if (ID.getKind() == InductionDescriptor::IK_IntInduction) { + AddOp = Instruction::Add; + MulOp = Instruction::Mul; + } else { + AddOp = ID.getInductionOpcode(); + MulOp = Instruction::FMul; + FMFs = ID.getInductionBinOp()->getFastMathFlags(); + } // If the phi is truncated, truncate the start and step values. VPBuilder Builder(Plan->getVectorPreheader()); if (isa(IV)) { assert(Start->getUnderlyingValue()->getType()->isIntegerTy() && "Truncation requires an integer type"); - auto *TruncType = cast(IV->getType()); - Step = Builder.createScalarCast(Instruction::Trunc, Step, TruncType, DL); - Start = Builder.createScalarCast(Instruction::Trunc, Start, TruncType, DL); + Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL); + Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL); } // Construct the initial value of the vector IV in the vector loop preheader. - auto *StartR = new VPWidenIntOrFpInductionInitialRecipe(IV, Start, Step, ID); - Plan->getVectorPreheader()->insert(StartR, Builder.getInsertPoint()); + Type *IVIntTy = IntegerType::get(IV->getContext(), Ty->getScalarSizeInBits()); + VPValue *Init = Builder.createStepVector(IVIntTy); + if (Ty->isFloatingPointTy()) + Init = Builder.createWidenCast(Instruction::UIToFP, Init, Ty); + + // FIXME: The newly created binary instructions should contain nsw/nuw + // flags, which can be found from the original scalar operations. + Init = Builder.createNaryOp(MulOp, {Init, Builder.createSplat(Step)}, FMFs); + Init = Builder.createNaryOp(AddOp, {Builder.createSplat(Start), Init}, FMFs, + {}, "induction"); // Create the widened phi of the vector IV. - auto *PhiR = new VPWidenIntOrFpInductionPHIRecipe(IV, StartR); - PhiR->insertBefore(WidenIVR); + auto *WidePHI = new VPWidenIntOrFpInductionPHIRecipe(IV, Init); + WidePHI->insertBefore(WidenIVR); // Create the backedge value for the vector IV. - VPValue *Prev = PhiR; - // If unrolled, use the last unrolled part in the increment. - if (auto *UnrolledPart = WidenIVR->getLastUnrolledPartOperand()) - Prev = UnrolledPart; - auto *IncR = new VPWidenIntOrFpInductionBackedgeRecipe( - IV, Step, VF, Prev, WidenIVR->getSplatVFValue(), ID); + VPValue *Inc; + VPValue *Prev; + // If unrolled, use the increment and prev value from the operands. + if (WidenIVR->getNumOperands() == 5) { + Inc = WidenIVR->getSplatVFValue(); + Prev = WidenIVR->getLastUnrolledPartOperand(); + assert(Inc && Prev); + } else { + unsigned VFTySize = TypeInfo.inferScalarType(VF)->getScalarSizeInBits(); + + // Multiply the vectorization factor by the step using integer or + // floating-point arithmetic as appropriate. + if (Ty->isFloatingPointTy()) + VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, Ty, DL); + else if (VFTySize < Ty->getScalarSizeInBits()) + VF = Builder.createScalarCast(Instruction::CastOps::ZExt, VF, Ty, DL); + else if (VFTySize > Ty->getScalarSizeInBits()) + VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL); + + Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs)); + Prev = WidePHI; + } + VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); - ExitingBB->insert(IncR, ExitingBB->getTerminator()->getIterator()); - PhiR->addOperand(IncR); + Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); + auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs, IV->getDebugLoc(), + "vec.ind.next"); + + WidePHI->addOperand(Next); - WidenIVR->replaceAllUsesWith(PhiR); + WidenIVR->replaceAllUsesWith(WidePHI); WidenIVR->eraseFromParent(); } -void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { +void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, + Type *CanonicalIVTy) { + VPTypeAnalysis TypeInfo(CanonicalIVTy); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) { if (auto *WidenIVR = dyn_cast(&R)) { - expandVPWidenIntOrFpInduction(WidenIVR); + expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo); continue; } if (!isa(&R)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 3dd476a8526d6..f77a3e1e370dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -161,7 +161,7 @@ struct VPlanTransforms { VPRecipeBuilder &RecipeBuilder); /// Lower abstract recipes to concrete ones, that can be codegen'd. - static void convertToConcreteRecipes(VPlan &Plan); + static void convertToConcreteRecipes(VPlan &Plan, Type *CanonicalIVTy); /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8e5148b1616bd..622ebf211e8e6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -354,8 +354,8 @@ class VPDef { VPWidenSelectSC, VPBlendSC, VPHistogramSC, - VPWidenIntOrFpInductionStartSC, - VPWidenIntOrFpInductionIncSC, + VPSplatSC, + VPStepVectorSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, VPPredInstPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 44148aac54407..8a372d134eebb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -20,11 +20,11 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -106,11 +106,11 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index dfcb1e561551f..96283637b34bf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -127,11 +127,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -251,10 +251,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -262,7 +262,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = udiv [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP23:%.*]] = udiv [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]] ; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]] ; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]] @@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP47:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index a83c62b04afc7..4074210a95b59 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -26,10 +26,10 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i32 1) +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 19b1e869a914a..08146a737eb1b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -108,13 +108,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -197,13 +197,13 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -637,16 +637,16 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG:%.*]], i64 0 +; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[X:%.*]] = sub nsw i64 0, [[TMP1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[DOTSPLAT]], [[VEC_IND]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[BROADCAST_SPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], [[VEC_IND]], i32 1 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i64.nxv4p0( [[TMP6]], i32 8, splat (i1 true), poison) @@ -654,7 +654,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], [[TMP5]], i32 8, splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], [[TMP6]], i32 8, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -885,10 +885,10 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1063,10 +1063,10 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1253,15 +1253,15 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP8]], splat (i64 1) +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1346,15 +1346,15 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP10]], splat (i64 3) +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1454,13 +1454,13 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP9]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 94e57c7ca023f..a2da67c2cbb81 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -38,10 +38,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -114,10 +114,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -222,10 +222,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -283,10 +283,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -380,12 +380,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -450,12 +450,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -567,10 +567,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -673,10 +673,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 6e6944b904ae0..109de3e14da0b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -28,13 +28,13 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] -; CHECK-NEXT: [[TMP26:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -122,13 +122,13 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 0, i32 [[TMP16]] -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -216,13 +216,13 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 0, i32 [[TMP16]] -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -320,13 +320,13 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -426,13 +426,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP16]] -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -540,13 +540,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 1, i32 [[TMP19]] -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 3839b367ae08c..8475004091f3f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -33,10 +33,10 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[START_EXT]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 3) +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[START_EXT]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 2582882baba00..396b54995fe2a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -70,10 +70,10 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3 ; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[X_I64]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP53:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP55:%.*]] = mul [[TMP53]], splat (i64 3) +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[X_I64]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP55]] ; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]] ; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement poison, i64 [[TMP58]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 7baba88061cd4..c00868c3e0be2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -25,11 +25,11 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] ; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 44af6c77a573c..ae6e833da01cf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -23,15 +23,15 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 9602d4cbf5df3..6a5a0eb3e54b0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -560,23 +560,23 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 -; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]] -; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[DOTSPLAT]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, splat (i1 true), poison), !alias.scope [[META8:![0-9]+]] ; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index e3f0700cf352e..219e1fe05ebf7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,11 +325,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -432,11 +432,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -995,12 +995,12 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]] +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: @@ -1011,9 +1011,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[DOTSPLAT]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -1025,7 +1025,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 +; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -1125,25 +1125,25 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP5]] +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9:%.*]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], splat (i64 10) -; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[DOTSPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 +; SCALABLE-NEXT: store [[DOTSPLAT]], ptr [[TMP13]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: @@ -1157,11 +1157,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] ; SCALABLE: do_store: -; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 +; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[B]], align 8 ; SCALABLE-NEXT: br label [[LATCH]] ; SCALABLE: latch: ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 +; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] @@ -1231,13 +1231,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]] +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1246,12 +1246,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], splat (i64 10) ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP10]], zeroinitializer -; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[DOTSPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[DOTSPLAT]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -1264,11 +1264,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] ; TF-SCALABLE: do_store: -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 +; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: br label [[LATCH]] ; TF-SCALABLE: latch: ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 +; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 7045b4e03ed8c..8893956bb3384 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -604,11 +604,11 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: ; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -796,11 +796,11 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: ; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 3d47d8217b5f5..5c1cb1da120c0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -197,28 +197,28 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]] ; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16 ; CHECK-NEXT: [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> , [[DOTSPLAT14]] +; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> , [[DOTSPLAT16]] -; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]] -; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 -; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] +; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 769d1e3569cda..45d1385fd6fa9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -713,11 +713,11 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 @@ -787,11 +787,11 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 @@ -864,11 +864,11 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index e10b801844985..c745b4f74786c 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -50,10 +50,10 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -61,13 +61,13 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[DOTSPLAT4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[TMP7]] to <4 x i64> ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT4]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index b94520ce5abfe..9aff603ef5fce 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -32,12 +32,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], -; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]] +; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP6]] ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer @@ -90,12 +90,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], ; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], -; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]] +; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP7]] ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -196,12 +196,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT3]], [[TMP6]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer @@ -279,12 +279,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], -; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]] +; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP6]] ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer @@ -337,12 +337,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], ; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], -; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]] +; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP7]] ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -445,12 +445,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP2]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT3]], [[TMP6]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer @@ -764,16 +764,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], -; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT6]], [[TMP13]] +; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 +; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: @@ -782,7 +782,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 -; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] ; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]] ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -791,7 +791,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00) -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -844,12 +844,12 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], ; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], +; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], -; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] +; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT7]], [[TMP20]] ; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1006,16 +1006,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], -; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP4]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: @@ -1024,7 +1024,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -1033,7 +1033,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 80a4754a0b63a..343c5ee538790 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -33,11 +33,11 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[DOTCAST]], [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INIT]], [[TMP3]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0 @@ -124,19 +124,19 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DOTCAST]], [[J_012]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_011]], [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i32> , [[DOTSPLAT]] +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT3]] -; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT3]], [[TMP12]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 @@ -350,22 +350,22 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -422,22 +422,22 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 848831eab8165..82a17297f6072 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -752,11 +752,11 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] -; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] +; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] ; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 ; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 @@ -872,11 +872,11 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] -; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] +; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] ; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 ; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 @@ -992,11 +992,11 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]] -; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] +; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] ; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 ; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 4db0153fe8491..6411a3031d1af 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -503,12 +503,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i8> , [[DOTSPLAT9]] +; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> , [[DOTSPLAT11]] -; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]] +; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT11]], [[TMP14]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -593,12 +593,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP14:%.*]] = mul <2 x i8> , [[DOTSPLAT9]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT11]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT11]], [[TMP14]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer From e7f5a4eb653687585c765e9e8488313dfd0d548c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 13 Dec 2024 01:43:23 +0800 Subject: [PATCH 03/28] Only truncate the IV step, don't widen it --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4945288e058fb..3438b547a7a15 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2106,15 +2106,11 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, Prev = WidenIVR->getLastUnrolledPartOperand(); assert(Inc && Prev); } else { - unsigned VFTySize = TypeInfo.inferScalarType(VF)->getScalarSizeInBits(); - // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. if (Ty->isFloatingPointTy()) VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, Ty, DL); - else if (VFTySize < Ty->getScalarSizeInBits()) - VF = Builder.createScalarCast(Instruction::CastOps::ZExt, VF, Ty, DL); - else if (VFTySize > Ty->getScalarSizeInBits()) + else if (Ty != TypeInfo.inferScalarType(VF)) VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL); Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs)); From 31bdc7c794ebb8d8a0d895cc782b7338b7ac080a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 16 Dec 2024 12:23:19 +0800 Subject: [PATCH 04/28] Use recipe debugloc after 734a204fbd4b790048c57f79351ad8beeb1000ce --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++--- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bd860c177e6ec..52459dcac2943 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1901,8 +1901,8 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe { Instruction *IV; public: - VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start) - : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start), + VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start, DebugLoc DL) + : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start, DL), IV(IV) { assert((isa(IV) || isa(IV)) && "Expected either an induction phi-node or a truncate of it!"); @@ -1911,7 +1911,8 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe { ~VPWidenIntOrFpInductionPHIRecipe() override = default; VPWidenIntOrFpInductionPHIRecipe *clone() override { - auto *R = new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0)); + auto *R = + new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0), getDebugLoc()); R->addOperand(getBackedgeValue()); return R; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1056e3d68f191..31607c11df988 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1709,7 +1709,7 @@ void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) { Value *Start = State.get(getOperand(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind"); Phi->addIncoming(Start, VectorPH); - Phi->setDebugLoc(IV->getDebugLoc()); + Phi->setDebugLoc(getDebugLoc()); State.set(this, Phi); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3438b547a7a15..23970b4b8764b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2094,7 +2094,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, {}, "induction"); // Create the widened phi of the vector IV. - auto *WidePHI = new VPWidenIntOrFpInductionPHIRecipe(IV, Init); + auto *WidePHI = + new VPWidenIntOrFpInductionPHIRecipe(IV, Init, WidenIVR->getDebugLoc()); WidePHI->insertBefore(WidenIVR); // Create the backedge value for the vector IV. @@ -2119,8 +2120,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); - auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs, IV->getDebugLoc(), - "vec.ind.next"); + auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs, + WidenIVR->getDebugLoc(), "vec.ind.next"); WidePHI->addOperand(Next); From 42dc912da4a94e9c27b16b7f31879b49ac1080c0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 16 Dec 2024 18:01:42 +0800 Subject: [PATCH 05/28] Mark splat and step vector recipes as free from side effects/reads/writes --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 31607c11df988..394d6ee8a5d14 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -73,6 +73,8 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPBranchOnMaskSC: case VPScalarIVStepsSC: case VPPredInstPHISC: + case VPSplatSC: + case VPStepVectorSC: return false; case VPBlendSC: case VPReductionEVLSC: @@ -120,6 +122,8 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPScalarIVStepsSC: case VPWidenStoreEVLSC: case VPWidenStoreSC: + case VPSplatSC: + case VPStepVectorSC: return false; case VPBlendSC: case VPReductionEVLSC: @@ -150,6 +154,8 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPPredInstPHISC: case VPScalarCastSC: case VPReverseVectorPointerSC: + case VPSplatSC: + case VPStepVectorSC: return false; case VPInstructionSC: return mayWriteToMemory(); From 6839647322dd8e3d8b2955e8f9c563aa23dfc1e1 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Dec 2024 12:16:29 +0800 Subject: [PATCH 06/28] Fix crash when pointer IV comes after intorfp IV --- .../Transforms/Vectorize/VPlanTransforms.cpp | 14 ++ .../LoopVectorize/pointer-induction.ll | 139 ++++++++++++++++++ 2 files changed, 153 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 23970b4b8764b..64e2d7b43463a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2135,6 +2135,20 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { + + // Move VPWidenPointerInductionRecipes to the back of the phis + // since it may insert non-phi instructions in place, which will + // interfere with other header phis if they come after. + // + // TODO: Expand out VPWidenPointerInductionRecipe into multiple + // recipes here and remove this + SmallVector PointerIVs; + for (VPRecipeBase &R : VPBB->phis()) + if (isa(R)) + PointerIVs.push_back(&R); + for (VPRecipeBase *R : PointerIVs) + R->moveBefore(*VPBB, VPBB->getFirstNonPhi()); + for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) { if (auto *WidenIVR = dyn_cast(&R)) { expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo); diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 531164a2c5dd0..6a297fb21a58b 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -284,3 +284,142 @@ for.cond: ; preds = %for.body, %entry for.end: ; preds = %for.cond ret void } + +; Test that WidenPointerInductionRecipes are ordered after the other header phis +define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { +; DEFAULT-LABEL: @outside_lattice( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 +; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; DEFAULT: vector.scevcheck: +; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; DEFAULT: vector.ph: +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 +; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] +; DEFAULT: vector.body: +; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT: middle.block: +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; DEFAULT: scalar.ph: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; DEFAULT: for.body: +; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]] +; DEFAULT-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8 +; DEFAULT-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]] +; DEFAULT-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4 +; DEFAULT-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1 +; DEFAULT-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1 +; DEFAULT-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]] +; DEFAULT-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] +; DEFAULT: for.end: +; DEFAULT-NEXT: ret void +; +; STRIDED-LABEL: @outside_lattice( +; STRIDED-NEXT: entry: +; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 +; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; STRIDED: vector.scevcheck: +; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; STRIDED: vector.ph: +; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 +; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] +; STRIDED: vector.body: +; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; STRIDED-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 +; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]] +; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 +; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]] +; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; STRIDED: middle.block: +; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; STRIDED: scalar.ph: +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; STRIDED-NEXT: br label [[FOR_BODY:%.*]] +; STRIDED: for.body: +; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] +; STRIDED-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ] +; STRIDED-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]] +; STRIDED-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8 +; STRIDED-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]] +; STRIDED-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4 +; STRIDED-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1 +; STRIDED-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1 +; STRIDED-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]] +; STRIDED-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] +; STRIDED: for.end: +; STRIDED-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv.ptr = phi ptr [ null, %entry ], [ %iv.ptr.next, %for.body ] + %iv.int = phi i32 [ 0, %entry ], [ %iv.int.next, %for.body ] + + %p.gep = getelementptr inbounds ptr, ptr %p, i32 %iv.int + store ptr %iv.ptr, ptr %p.gep + + %q.gep = getelementptr inbounds i32, ptr %q, i32 %iv.int + store i32 %iv.int, ptr %q.gep + + %iv.int.next = add i32 %iv.int, 1 + %iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i32 1 + + %done = icmp ult i32 %iv.int.next, %n + br i1 %done, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} From 6c659cacc793b0cb1cc57b56c1de4b5d16e8c66b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 5 Dec 2024 00:09:46 +0800 Subject: [PATCH 07/28] Remove VPWidenIntOrFpInductionPHIRecipe, use VPWidenPHIRecipe instead --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlan.h | 44 ++++--------------- .../Transforms/Vectorize/VPlanAnalysis.cpp | 15 +++---- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +----------- .../Transforms/Vectorize/VPlanTransforms.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 - .../LoopVectorize/pointer-induction.ll | 8 ++-- 7 files changed, 24 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b8b958ddaa9ae..3d69226a4c453 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2932,8 +2932,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. - if (EnableVPlanNativePath) - fixNonInductionPHIs(State); + fixNonInductionPHIs(State); // After vectorization, the exit blocks of the original loop will have // additional predecessors. Invalidate SCEVs for the exit phis in case SE diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 52459dcac2943..f8420e0a87948 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1895,39 +1895,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { } }; -/// A recipe to generate the PHI of a widened IV, expanded from -/// VPWidenIntOrFpInductionRecipe. -class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe { - Instruction *IV; - -public: - VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start, DL), - IV(IV) { - assert((isa(IV) || isa(IV)) && - "Expected either an induction phi-node or a truncate of it!"); - } - - ~VPWidenIntOrFpInductionPHIRecipe() override = default; - - VPWidenIntOrFpInductionPHIRecipe *clone() override { - auto *R = - new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0), getDebugLoc()); - R->addOperand(getBackedgeValue()); - return R; - } - - VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionPHISC) - - void execute(VPTransformState &State) override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif -}; - class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, public VPUnrollPartAccessor<3> { bool IsScalarAfterVectorization; @@ -2017,11 +1984,16 @@ class VPScalarPHIRecipe : public VPHeaderPHIRecipe { /// exactly 2 incoming values, the first from the predecessor of the region and /// the second from the exiting block of the region. class VPWidenPHIRecipe : public VPSingleDefRecipe { + /// Name to use for the generated IR instruction for the widened phi. + std::string Name; + public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and - /// debug location \p DL. - VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}) - : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL) { + /// debug location \p DL.. + VPWidenPHIRecipe(Instruction *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, + const Twine &Name = "vec.phi") + : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL), + Name(Name.str()) { if (Start) addOperand(Start); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 6108f2f4e4513..8414b36f3ae22 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -236,14 +236,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPScalarPHIRecipe>([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 394d6ee8a5d14..c2bb975d538cf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1709,27 +1709,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } -void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) { - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - - Value *Start = State.get(getOperand(0)); - PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind"); - Phi->addIncoming(Start, VectorPH); - Phi->setDebugLoc(getDebugLoc()); - State.set(this, Phi); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O, - const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent; - printAsOperand(O, SlotTracker); - O << " = WIDEN-INDUCTION-PHI "; - printOperands(O, SlotTracker); -} -#endif - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -3507,13 +3486,10 @@ VPBasicBlock *VPWidenPHIRecipe::getIncomingBlock(unsigned I) { } void VPWidenPHIRecipe::execute(VPTransformState &State) { - assert(EnableVPlanNativePath && - "Non-native vplans are not expected to have VPWidenPHIRecipes."); - State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); - Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); + Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); State.set(this, VecPhi); } @@ -3522,7 +3498,7 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-PHI "; - auto *OriginalPhi = cast(getUnderlyingValue()); + auto *OriginalPhi = cast(getUnderlyingValue()); // Unless all incoming values are modeled in VPlan print the original PHI // directly. // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 64e2d7b43463a..6e3cce6cac627 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2095,7 +2095,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, // Create the widened phi of the vector IV. auto *WidePHI = - new VPWidenIntOrFpInductionPHIRecipe(IV, Init, WidenIVR->getDebugLoc()); + new VPWidenPHIRecipe(IV, nullptr, WidenIVR->getDebugLoc(), "vec.ind"); + WidePHI->addOperand(Init); WidePHI->insertBefore(WidenIVR); // Create the backedge value for the vector IV. diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 622ebf211e8e6..cb0ae78c0877e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -366,7 +366,6 @@ class VPDef { VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, - VPWidenIntOrFpInductionPHISC, VPWidenPointerInductionSC, VPScalarPHISC, VPReductionPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 6a297fb21a58b..eab3a1a904371 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -327,8 +327,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] @@ -384,8 +384,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ] -; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; STRIDED-NEXT: br label [[FOR_BODY:%.*]] ; STRIDED: for.body: ; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] From 1cd6c81f78a7171156026167de34b62ea287988b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 6 Jan 2025 19:17:08 +0800 Subject: [PATCH 08/28] Use dyn_cast --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d2c732b7230e3..5931ca2df9efe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1050,8 +1050,7 @@ void VPlan::execute(VPTransformState *State) { if (isa(&R)) continue; - if (isa(&R)) { - auto *WidenPhi = cast(&R); + if (auto *WidenPhi = dyn_cast(&R)) { assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && "recipe generating only scalars should have been replaced"); auto *GEP = cast(State->get(WidenPhi)); From dcc6640e92611f48187f8bdc798ea205a0601d27 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 6 Jan 2025 23:42:01 +0800 Subject: [PATCH 09/28] Remove VPSplatRecipe, replace with VPInstruction::Splat --- .../Vectorize/LoopVectorizationPlanner.h | 4 +-- llvm/lib/Transforms/Vectorize/VPlan.h | 35 ++----------------- .../Transforms/Vectorize/VPlanAnalysis.cpp | 2 ++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +++++---------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 - 5 files changed, 15 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 7f1ec987ad038..ae3ca6b0df914 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -265,8 +265,8 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags())); } - VPSplatRecipe *createSplat(VPValue *Val) { - return tryInsertInstruction(new VPSplatRecipe(Val)); + VPInstruction *createSplat(VPValue *Val) { + return tryInsertInstruction(new VPInstruction(VPInstruction::Splat, {Val})); } VPStepVectorRecipe *createStepVector(Type *Ty) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f8420e0a87948..c6390a69edd1f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -880,6 +880,8 @@ class VPInstruction : public VPRecipeWithIRFlags, // Extracts the first active lane of a vector, where the first operand is // the predicate, and the second operand is the vector to extract. ExtractFirstActive, + // Splats a scalar value across all lanes. + Splat, }; private: @@ -1236,39 +1238,6 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { } }; -/// A for splatting a scalar value to a vector. -class VPSplatRecipe : public VPSingleDefRecipe { -public: - VPSplatRecipe(VPValue *Op) : VPSingleDefRecipe(VPDef::VPSplatSC, {Op}) {} - - ~VPSplatRecipe() override = default; - - VPSplatRecipe *clone() override { return new VPSplatRecipe(getOperand(0)); } - - VP_CLASSOF_IMPL(VPDef::VPSplatSC) - - void execute(VPTransformState &State) override; - - /// Return the cost of this VPSplatRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // TODO: Compute accurate cost after retiring the legacy cost model. - return 0; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } -}; - /// A recipe for generating a step vector. class VPStepVectorRecipe : public VPSingleDefRecipe { /// Scalar return type of the intrinsic. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8414b36f3ae22..ec97774ce37f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -96,6 +96,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::BranchOnCond: case VPInstruction::BranchOnCount: return Type::getVoidTy(Ctx); + case VPInstruction::Splat: + return inferScalarType(R->getOperand(0)); default: break; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c2bb975d538cf..8ddd0e048e3ed 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPBranchOnMaskSC: case VPScalarIVStepsSC: case VPPredInstPHISC: - case VPSplatSC: case VPStepVectorSC: return false; case VPBlendSC: @@ -122,7 +121,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPScalarIVStepsSC: case VPWidenStoreEVLSC: case VPWidenStoreSC: - case VPSplatSC: case VPStepVectorSC: return false; case VPBlendSC: @@ -154,7 +152,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPPredInstPHISC: case VPScalarCastSC: case VPReverseVectorPointerSC: - case VPSplatSC: case VPStepVectorSC: return false; case VPInstructionSC: @@ -715,6 +712,10 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.getInt64Ty(), Mask, true, "first.active.lane"); return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value"); } + case VPInstruction::Splat: + return State.Builder.CreateVectorSplat(State.VF, + State.get(getOperand(0), true)); + default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -826,6 +827,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::LogicalAnd: case VPInstruction::Not: case VPInstruction::PtrAdd: + case VPInstruction::Splat: return false; default: return true; @@ -853,6 +855,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: case VPInstruction::ResumePhi: + case VPInstruction::Splat: return true; }; llvm_unreachable("switch should return"); @@ -944,6 +947,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractFirstActive: O << "extract-first-active"; break; + case VPInstruction::Splat: + O << "splat"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -2319,22 +2325,6 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, } #endif -void VPSplatRecipe::execute(VPTransformState &State) { - Value *Splat = - State.Builder.CreateVectorSplat(State.VF, State.get(getOperand(0), true)); - State.set(this, Splat); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPSplatRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent; - printAsOperand(O, SlotTracker); - O << " = SPLAT "; - printOperands(O, SlotTracker); -} -#endif - void VPStepVectorRecipe::execute(VPTransformState &State) { VectorType *Ty = VectorType::get(ScalarTy, State.VF); State.set(this, State.Builder.CreateStepVector(Ty)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index cb0ae78c0877e..61422bfe6c0b0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -354,7 +354,6 @@ class VPDef { VPWidenSelectSC, VPBlendSC, VPHistogramSC, - VPSplatSC, VPStepVectorSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, From ca81015f3f2c0a561f585b2da6ee50e37379fb5d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 6 Jan 2025 23:42:26 +0800 Subject: [PATCH 10/28] Update comment --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6e3cce6cac627..8861f499d7e2f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2038,7 +2038,7 @@ void VPlanTransforms::createInterleaveGroups( /// /// vector loop: { /// vector.body: -/// ir<%i> = WIDEN-INDUCTION-PHI vp<%induction>, vp<%vec.ind.next> +/// ir<%i> = WIDEN-PHI vp<%induction>, vp<%vec.ind.next> /// ... /// vp<%vec.ind.next> = add ir<%i>, vp<%inc> /// EMIT branch-on-count ... From 8b77af442a5ccd9b1e1f5e98d2b51c7d0cf5a12c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 8 Jan 2025 15:49:24 +0800 Subject: [PATCH 11/28] Pass Name when generating VPInstruction::Splat --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8ddd0e048e3ed..3fc424a111dd9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -713,8 +713,8 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value"); } case VPInstruction::Splat: - return State.Builder.CreateVectorSplat(State.VF, - State.get(getOperand(0), true)); + return State.Builder.CreateVectorSplat( + State.VF, State.get(getOperand(0), true), Name); default: llvm_unreachable("Unsupported opcode for instruction"); From 5ad4cc1a2b8f5ca45b7ba265cb1da1b728d2fe4a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 3 Feb 2025 17:55:55 +0800 Subject: [PATCH 12/28] Allow splatting VPScalarCastRecipe directly, remove VPInstruction::splat --- .../Vectorize/LoopVectorizationPlanner.h | 4 - llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 +- llvm/lib/Transforms/Vectorize/VPlan.h | 2 - .../Transforms/Vectorize/VPlanAnalysis.cpp | 2 - .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 - .../Transforms/Vectorize/VPlanTransforms.cpp | 7 +- .../AArch64/clamped-trip-count.ll | 12 +- .../AArch64/divs-with-scalable-vfs.ll | 12 +- .../AArch64/outer_loop_prefer_scalable.ll | 8 +- .../AArch64/scalable-avoid-scalarization.ll | 6 +- .../AArch64/simple_early_exit.ll | 12 +- .../AArch64/sve-interleaved-accesses.ll | 47 ++-- .../LoopVectorize/AArch64/sve-tail-folding.ll | 6 +- .../RISCV/blocks-with-dead-instructions.ll | 42 ++-- .../LoopVectorize/RISCV/dead-ops-cost.ll | 24 +- .../LoopVectorize/RISCV/induction-costs.ll | 6 +- .../LoopVectorize/RISCV/mask-index-type.ll | 6 +- .../RISCV/masked_gather_scatter.ll | 12 +- ...ruction-or-drop-poison-generating-flags.ll | 6 +- .../LoopVectorize/RISCV/strided-accesses.ll | 18 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 30 +-- ...rize-force-tail-with-evl-cond-reduction.ll | 24 +- .../X86/epilog-vectorization-inductions.ll | 8 +- .../LoopVectorize/X86/induction-costs.ll | 12 +- .../LoopVectorize/X86/induction-step.ll | 8 +- .../LoopVectorize/X86/scatter_crash.ll | 16 +- .../LoopVectorize/create-induction-resume.ll | 4 +- .../LoopVectorize/float-induction.ll | 36 +-- .../LoopVectorize/induction-step.ll | 42 ++-- .../Transforms/LoopVectorize/induction.ll | 238 +++++++++--------- .../LoopVectorize/iv_outside_user.ll | 24 +- .../optimal-epilog-vectorization.ll | 16 +- .../LoopVectorize/outer_loop_scalable.ll | 6 +- .../pr30654-phiscev-sext-trunc.ll | 12 +- .../LoopVectorize/scalable-inductions.ll | 12 +- 35 files changed, 327 insertions(+), 423 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index ae3ca6b0df914..f7c068eaa4d9d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -265,10 +265,6 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags())); } - VPInstruction *createSplat(VPValue *Val) { - return tryInsertInstruction(new VPInstruction(VPInstruction::Splat, {Val})); - } - VPStepVectorRecipe *createStepVector(Type *Ty) { return tryInsertInstruction(new VPStepVectorRecipe(Ty)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5931ca2df9efe..59bd90b5c2f5f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -313,15 +313,17 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, LastLane)); - // Set the insert point after the last scalarized instruction or after the - // last PHI, if LastInst is a PHI. This ensures the insertelement sequence - // will directly follow the scalar definitions. auto OldIP = Builder.saveIP(); - auto NewIP = isa(LastInst) - ? LastInst->getParent()->getFirstNonPHIIt() - : std::next(BasicBlock::iterator(LastInst)); - Builder.SetInsertPoint(&*NewIP); + auto *LastVal = get(Def, LastLane); + if (auto *LastInst = dyn_cast(LastVal)) { + // Set the insert point after the last scalarized instruction or after the + // last PHI, if LastInst is a PHI. This ensures the insertelement sequence + // will directly follow the scalar definitions. + auto NewIP = isa(LastInst) + ? LastInst->getParent()->getFirstNonPHIIt() + : std::next(BasicBlock::iterator(LastInst)); + Builder.SetInsertPoint(&*NewIP); + } // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just @@ -336,7 +338,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); + Value *Undef = PoisonValue::get(toVectorizedTy(LastVal->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorizedValue(Def, Lane); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c6390a69edd1f..e1e86fe980cc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -880,8 +880,6 @@ class VPInstruction : public VPRecipeWithIRFlags, // Extracts the first active lane of a vector, where the first operand is // the predicate, and the second operand is the vector to extract. ExtractFirstActive, - // Splats a scalar value across all lanes. - Splat, }; private: diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index ec97774ce37f2..8414b36f3ae22 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -96,8 +96,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::BranchOnCond: case VPInstruction::BranchOnCount: return Type::getVoidTy(Ctx); - case VPInstruction::Splat: - return inferScalarType(R->getOperand(0)); default: break; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3fc424a111dd9..a18a112048f1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -712,9 +712,6 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.getInt64Ty(), Mask, true, "first.active.lane"); return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value"); } - case VPInstruction::Splat: - return State.Builder.CreateVectorSplat( - State.VF, State.get(getOperand(0), true), Name); default: llvm_unreachable("Unsupported opcode for instruction"); @@ -827,7 +824,6 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::LogicalAnd: case VPInstruction::Not: case VPInstruction::PtrAdd: - case VPInstruction::Splat: return false; default: return true; @@ -855,7 +851,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: case VPInstruction::ResumePhi: - case VPInstruction::Splat: return true; }; llvm_unreachable("switch should return"); @@ -947,9 +942,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractFirstActive: O << "extract-first-active"; break; - case VPInstruction::Splat: - O << "splat"; - break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -2294,8 +2286,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, Value *VPScalarCastRecipe ::generate(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - assert(vputils::onlyFirstLaneUsed(this) && - "Codegen only implemented for first lane."); switch (Opcode) { case Instruction::SExt: case Instruction::ZExt: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8861f499d7e2f..cb9dc7da2a656 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2089,9 +2089,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, // FIXME: The newly created binary instructions should contain nsw/nuw // flags, which can be found from the original scalar operations. - Init = Builder.createNaryOp(MulOp, {Init, Builder.createSplat(Step)}, FMFs); - Init = Builder.createNaryOp(AddOp, {Builder.createSplat(Start), Init}, FMFs, - {}, "induction"); + Init = Builder.createNaryOp(MulOp, {Init, Step}, FMFs); + Init = Builder.createNaryOp(AddOp, {Start, Init}, FMFs, {}, "induction"); // Create the widened phi of the vector IV. auto *WidePHI = @@ -2115,7 +2114,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, else if (Ty != TypeInfo.inferScalarType(VF)) VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL); - Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs)); + Inc = Builder.createNaryOp(MulOp, {Step, VF}, FMFs); Prev = WidePHI; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 8a372d134eebb..aa27330e47b5c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -20,9 +20,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -39,7 +39,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -106,9 +106,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -125,7 +125,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 96283637b34bf..40719fa47393f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -127,9 +127,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP36:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP18]] ; CHECK-NEXT: [[TMP37:%.*]] = extractelement [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -251,9 +251,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP47:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP18]] ; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 3302103873bd3..71e07f01651cd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -23,9 +23,9 @@ define void @foo() { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] @@ -47,7 +47,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi [ [[TMP11]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI4]], [[TMP9]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 4074210a95b59..bd4eaad9fb099 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -31,9 +31,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i32 1), [[DOTSPLAT2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -48,7 +48,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP19]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 42600a132c2c3..9d08613a06d4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -28,9 +28,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv16i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] @@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP16]]) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] @@ -286,9 +286,9 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[TMP16:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP16]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP17]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -305,7 +305,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 08146a737eb1b..da1782be43af5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -108,9 +108,9 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl [[DOTSPLAT]], splat (i64 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 @@ -133,7 +133,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP12]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP4]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -197,9 +197,9 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shl [[DOTSPLAT]], splat (i64 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 @@ -224,7 +224,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[TMP9]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP14]], [[TMP15]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP4]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -379,8 +379,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i32 0, [[TMP3]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[DOTNEG]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -412,7 +411,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = sub [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -637,8 +636,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] -; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -654,7 +652,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP4]], [[TMP5]], i32 8, splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP7]], [[TMP6]], i32 8, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = sub [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -1253,9 +1251,9 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shl [[DOTSPLAT]], splat (i64 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -1272,7 +1270,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[P]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1346,9 +1344,9 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP10]], splat (i64 3) -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = shl [[DOTSPLAT]], splat (i64 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -1368,7 +1366,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], [[TMP17]], i32 4, splat (i1 true)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], [[TMP15]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP12]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1454,9 +1452,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP9]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl [[DOTSPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 @@ -1466,7 +1464,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_MASKED_GATHER4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP15]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP18:%.*]] = or disjoint [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], [[TMP18]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[TMP20]], i32 4, splat (i1 true), poison), !alias.scope [[META34:![0-9]+]] @@ -1481,7 +1479,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP16]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: @@ -1715,8 +1713,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1762,7 +1759,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call @llvm.vector.interleave2.nxv16i32( [[INTERLEAVED_VEC]], [[INTERLEAVED_VEC10]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = sub [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 75b2df93c9350..a4a57a186ebed 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -227,9 +227,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = mul splat (i64 4), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -242,7 +242,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 109de3e14da0b..bfe42fa9d02e1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -28,9 +28,9 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -43,7 +43,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -122,9 +122,9 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -137,7 +137,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -216,9 +216,9 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -231,7 +231,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -320,9 +320,9 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -335,7 +335,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -426,9 +426,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -441,7 +441,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -540,9 +540,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -555,7 +555,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -857,9 +857,9 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP18]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP20]] -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 3, [[TMP17]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP23]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -867,7 +867,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( zeroinitializer, [[TMP24]], i32 4, splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META21:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP21]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 8475004091f3f..eefe570d68056 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -38,9 +38,9 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[START_EXT]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = mul splat (i64 3), [[DOTSPLAT2]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -48,7 +48,7 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP21]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -117,9 +117,9 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP9]], splat (i32 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP14]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = mul splat (i32 4), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -128,7 +128,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0( zeroinitializer, [[TMP16]], i32 1, splat (i1 true)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -335,9 +335,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 9) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = mul splat (i64 9), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -345,7 +345,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP10]], [[TMP11]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -417,9 +417,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP9]], splat (i64 2) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = mul splat (i64 2), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -435,7 +435,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP12]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 396b54995fe2a..59463191b5ada 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -75,9 +75,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[X_I64]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP55]] -; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]] -; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement poison, i64 [[TMP58]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement poison, i64 [[TMP52]], i64 0 ; CHECK-NEXT: [[DOTSPLAT25:%.*]] = shufflevector [[DOTSPLATINSERT24]], poison, zeroinitializer +; CHECK-NEXT: [[TMP54:%.*]] = mul splat (i64 3), [[DOTSPLAT25]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -85,7 +85,7 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i16, ptr [[A]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP59]], i32 2, splat (i1 true)), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP52]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT25]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP54]] ; CHECK-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP60]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index c00868c3e0be2..cea04be41a5d1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -25,9 +25,9 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -46,7 +46,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 ; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP19]], align 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 2276b592aac8a..8b01a4b4cd904 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -44,9 +44,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) ; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] -; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV32-NEXT: [[TMP10:%.*]] = mul splat (i64 16), [[DOTSPLAT]] ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -62,7 +62,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP10]] ; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: @@ -123,9 +123,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) ; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] -; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV64-NEXT: [[TMP10:%.*]] = mul splat (i64 16), [[DOTSPLAT]] ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -141,7 +141,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP10]] ; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index ae6e833da01cf..49b36f9845735 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -23,9 +23,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 @@ -53,7 +53,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0( zeroinitializer, ptr [[TMP25]], i32 2, [[TMP22]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 6a5a0eb3e54b0..a656ce1f06537 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -22,9 +22,9 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -35,7 +35,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,9 +92,9 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 64) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 64), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -104,7 +104,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -560,9 +560,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]] -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; STRIDED-NEXT: [[TMP15:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -576,7 +576,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP15]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 219e1fe05ebf7..2e1afb0c9dd28 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,9 +325,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -342,7 +342,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP13]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: @@ -432,9 +432,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -451,7 +451,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -995,9 +995,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8:%.*]], i64 0 @@ -1013,7 +1013,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[DOTSPLAT]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -1125,9 +1125,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP15:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9:%.*]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 @@ -1143,7 +1143,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; SCALABLE-NEXT: store [[DOTSPLAT]], ptr [[TMP13]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP15]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: @@ -1231,9 +1231,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 @@ -1251,7 +1251,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[DOTSPLAT]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP15]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 8893956bb3384..1851fd1c6e11b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -605,9 +605,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 -; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -622,7 +622,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select [[TMP27]], [[WIDE_MASKED_LOAD]], zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] ; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -667,9 +667,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 -; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -684,7 +684,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -797,9 +797,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 -; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -815,7 +815,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP23:%.*]] = xor [[TMP28]], splat (i1 true) ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP23]], [[VEC_PHI]], [[TMP22]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -864,9 +864,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 -; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -881,7 +881,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 5c1cb1da120c0..47136d0f73a0a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -150,9 +150,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> , [[DOTSPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> , [[TMP2]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer @@ -203,9 +201,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 -; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT19:%.*]] = mul <8 x i16> [[DOTSPLAT14]], splat (i16 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 45d1385fd6fa9..2f6276172f296 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -719,9 +719,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -793,9 +791,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -870,9 +866,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 1dd2692ba6822..c78cf3c586788 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -17,9 +17,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> , [[DOTSPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -91,9 +89,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> , [[DOTSPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 312125608c87e..6cac99c0cbc50 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -131,7 +131,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]] -; CHECK: vector.body28: +; CHECK: vector.body30: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY29]] ] @@ -153,18 +153,18 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK35:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block35: +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK37:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block37: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]] -; CHECK: vec.epilog.iter.check42: +; CHECK: vec.epilog.iter.check44: ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH40]], label [[VEC_EPILOG_PH42]] -; CHECK: vec.epilog.ph41: +; CHECK: vec.epilog.ph43: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] @@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY51:%.*]] -; CHECK: vec.epilog.vector.body57: +; CHECK: vec.epilog.vector.body59: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] ; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] ; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ] @@ -206,10 +206,10 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY51]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: vec.epilog.middle.block64: +; CHECK: vec.epilog.middle.block66: ; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N65]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH40]] -; CHECK: vec.epilog.scalar.ph40: +; CHECK: vec.epilog.scalar.ph42: ; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[ITER_CHECK22]] ] ; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[ITER_CHECK22]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index c745b4f74786c..3f49fd9334615 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -50,9 +50,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 9aff603ef5fce..c505340a4dfda 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -38,9 +38,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP6]] -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -87,11 +85,9 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] -; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 -; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer @@ -202,9 +198,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT3]], [[TMP6]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -285,9 +279,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP6]] -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -334,11 +326,9 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 -; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer @@ -451,9 +441,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT3]], [[TMP6]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -770,9 +758,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT6]], [[TMP13]] -; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 -; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 -; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -841,11 +827,9 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] -; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer @@ -1012,9 +996,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 343c5ee538790..6b57ed7bd6d47 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -39,9 +39,7 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = mul <8 x i32> [[DOTSPLAT3]], splat (i32 8) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -59,11 +57,11 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N) ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[X_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[X_05]] @@ -130,9 +128,7 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT3]], [[TMP12]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT6:%.*]] = mul <8 x i32> [[DOTSPLAT]], splat (i32 8) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -150,11 +146,11 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N, ; CHECK-NEXT: br i1 [[CMP_N]], label %[[INNER_EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[X_011]], %[[OUTER_HEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[X_011]], %[[OUTER_HEADER]] ] ; CHECK-NEXT: br label %[[INNER:.*]] ; CHECK: [[INNER]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INNER]] ] -; CHECK-NEXT: [[X_18:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[INNER]] ] +; CHECK-NEXT: [[X_18:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[INNER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[X_18]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add nsw i32 [[X_18]], [[J_012]] @@ -225,9 +221,7 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) { ; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> , [[DOTSPLAT6]] ; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT9:%.*]] = mul <8 x i32> [[DOTSPLAT6]], splat (i32 8) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -245,11 +239,11 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[J]] to i32 ; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 @@ -350,22 +344,18 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = mul <8 x i16> [[DOTSPLAT]], splat (i16 8) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -373,11 +363,11 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ADD]] = add i16 [[IV_2]], [[O_1]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_DST]], align 2 @@ -422,9 +412,7 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = mul <8 x i16> [[DOTSPLAT]], splat (i16 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index d2d123af4b8f7..e58f9ee58c53c 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3138,7 +3138,7 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] @@ -3153,7 +3153,7 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]] ; CHECK-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 ; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]] ; CHECK: loopexit: ; CHECK-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[AND_I_LCSSA]] @@ -3180,7 +3180,7 @@ define i32 @testoverflowcheck() { ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; IND-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]]) @@ -3195,7 +3195,7 @@ define i32 @testoverflowcheck() { ; IND-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] ; IND-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 ; IND-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 -; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] +; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]] ; IND: loopexit: ; IND-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IND-NEXT: ret i32 [[AND_I_LCSSA]] @@ -3222,7 +3222,7 @@ define i32 @testoverflowcheck() { ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; UNROLL-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]]) @@ -3237,7 +3237,7 @@ define i32 @testoverflowcheck() { ; UNROLL-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] ; UNROLL-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 ; UNROLL-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 -; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL: loopexit: ; UNROLL-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[AND_I_LCSSA]] @@ -3269,7 +3269,7 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP6]], [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) @@ -3285,7 +3285,7 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]] ; UNROLL-NO-IC-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 ; UNROLL-NO-IC-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL-NO-IC: loopexit: ; UNROLL-NO-IC-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[AND_I_LCSSA]] @@ -3312,7 +3312,7 @@ define i32 @testoverflowcheck() { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP6]]) @@ -3327,7 +3327,7 @@ define i32 @testoverflowcheck() { ; INTERLEAVE-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] ; INTERLEAVE-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 ; INTERLEAVE-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 -; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]] ; INTERLEAVE: loopexit: ; INTERLEAVE-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[AND_I_LCSSA]] @@ -3401,7 +3401,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3420,7 +3420,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 ; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -3486,7 +3486,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 ; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]] ; IND: exit.loopexit: ; IND-NEXT: br label [[EXIT]] ; IND: exit: @@ -3535,7 +3535,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3555,7 +3555,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 ; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL: exit.loopexit: ; UNROLL-NEXT: br label [[EXIT]] ; UNROLL: exit: @@ -3609,7 +3609,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3628,7 +3628,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 ; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]] ; UNROLL-NO-IC: exit.loopexit: ; UNROLL-NO-IC-NEXT: br label [[EXIT]] ; UNROLL-NO-IC: exit: @@ -3677,7 +3677,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3697,7 +3697,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 ; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]] ; INTERLEAVE: exit.loopexit: ; INTERLEAVE-NEXT: br label [[EXIT]] ; INTERLEAVE: exit: @@ -3781,7 +3781,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3801,7 +3801,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4 ; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -3849,7 +3849,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3870,7 +3870,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 ; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; IND: exit.loopexit: ; IND-NEXT: br label [[EXIT]] ; IND: exit: @@ -3921,7 +3921,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 16) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -3942,7 +3942,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 ; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL: exit.loopexit: ; UNROLL-NEXT: br label [[EXIT]] ; UNROLL: exit: @@ -3998,7 +3998,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 8) ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -4018,7 +4018,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4 ; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; UNROLL-NO-IC: exit.loopexit: ; UNROLL-NO-IC-NEXT: br label [[EXIT]] ; UNROLL-NO-IC: exit: @@ -4069,7 +4069,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 32) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -4090,7 +4090,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 ; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 ; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] -; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] ; INTERLEAVE: exit.loopexit: ; INTERLEAVE-NEXT: br label [[EXIT]] ; INTERLEAVE: exit: @@ -4147,7 +4147,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4160,7 +4160,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -4180,7 +4180,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4194,7 +4194,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; IND: exit: ; IND-NEXT: ret void ; @@ -4217,7 +4217,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4231,7 +4231,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -4256,7 +4256,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4269,7 +4269,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4292,7 +4292,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4306,7 +4306,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret void ; @@ -4352,7 +4352,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4366,7 +4366,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -4390,7 +4390,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4406,7 +4406,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] -; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; IND: exit: ; IND-NEXT: ret void ; @@ -4433,7 +4433,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4449,7 +4449,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -4482,7 +4482,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4496,7 +4496,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4523,7 +4523,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4539,7 +4539,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret void ; @@ -4586,7 +4586,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4599,7 +4599,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -4625,7 +4625,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4639,7 +4639,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; IND: exit: ; IND-NEXT: ret void ; @@ -4668,7 +4668,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4682,7 +4682,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -4713,7 +4713,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4726,7 +4726,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4755,7 +4755,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -4769,7 +4769,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4 ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret void ; @@ -4809,7 +4809,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4826,7 +4826,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -4847,7 +4847,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4864,7 +4864,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; IND-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 ; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] +; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]] ; IND: for.end: ; IND-NEXT: ret void ; @@ -4888,7 +4888,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4905,7 +4905,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 ; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] +; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -4932,7 +4932,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4949,7 +4949,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4973,7 +4973,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 16) ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4990,7 +4990,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; INTERLEAVE-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]] ; INTERLEAVE: for.end: ; INTERLEAVE-NEXT: ret void ; @@ -5057,7 +5057,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -5080,7 +5080,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] ; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR7]] @@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 -; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP14]]) ; IND-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -5132,7 +5132,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND: for.cond: ; IND-NEXT: br label [[FOR_INC]] ; IND: for.inc: -; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; IND: for.end: ; IND-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; IND-NEXT: ret i32 [[VAR7]] @@ -5196,7 +5196,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 -; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP27]], [[TMP26]] ; UNROLL-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) @@ -5208,7 +5208,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL: for.cond: ; UNROLL-NEXT: br label [[FOR_INC]] ; UNROLL: for.inc: -; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAR7]] @@ -5276,7 +5276,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 -; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]] ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) @@ -5300,7 +5300,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[VAR7]] @@ -5400,7 +5400,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 8) ; INTERLEAVE-NEXT: [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP47]], [[TMP46]] ; INTERLEAVE-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]]) @@ -5423,7 +5423,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR4]] ; INTERLEAVE-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 -; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; INTERLEAVE: for.end: ; INTERLEAVE-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ poison, [[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[VAR7]] @@ -5489,7 +5489,7 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]]) ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1 @@ -5523,7 +5523,7 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 ; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 -; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]] ; ; IND-LABEL: @trunc_with_first_order_recurrence( ; IND-NEXT: entry: @@ -5553,7 +5553,7 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]]) ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i64 1 @@ -5585,7 +5585,7 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; IND-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 ; IND-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 -; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] +; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]] ; ; UNROLL-LABEL: @trunc_with_first_order_recurrence( ; UNROLL-NEXT: entry: @@ -5630,7 +5630,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4) ; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4) ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]] ; UNROLL-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) @@ -5663,7 +5663,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 ; UNROLL-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 -; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @trunc_with_first_order_recurrence( ; UNROLL-NO-IC-NEXT: entry: @@ -5708,7 +5708,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) @@ -5743,7 +5743,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]] ; ; INTERLEAVE-LABEL: @trunc_with_first_order_recurrence( ; INTERLEAVE-NEXT: entry: @@ -5788,7 +5788,7 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8) ; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP21]], [[TMP20]] ; INTERLEAVE-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]]) @@ -5821,7 +5821,7 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; INTERLEAVE-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 ; INTERLEAVE-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 -; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]] ; entry: br label %loop @@ -5880,7 +5880,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -5902,7 +5902,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -5928,13 +5928,13 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] +; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; IND: middle.block: ; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: -; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] +; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] ; IND: exit: ; IND-NEXT: ret void ; @@ -5966,13 +5966,13 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: -; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] +; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -6005,7 +6005,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -6027,7 +6027,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] ; UNROLL-NO-IC-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -6059,7 +6059,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3 ; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -6081,7 +6081,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]] ; INTERLEAVE-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret void ; @@ -6148,9 +6148,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <2 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <2 x i32> [[DOTSPLAT]], splat (i32 2) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6164,7 +6162,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -6185,7 +6183,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] ; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -6221,9 +6219,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: [[TMP15:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], -; IND-NEXT: [[TMP16:%.*]] = shl i32 [[STEP]], 1 -; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0 -; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer +; IND-NEXT: [[DOTSPLAT3:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1) ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -6235,7 +6231,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] +; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1 ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -6256,7 +6252,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] ; IND-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] +; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] ; IND: exit: ; IND-NEXT: ret void ; @@ -6289,11 +6285,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] -; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 -; UNROLL-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[BROADCAST_SPLATINSERT]], -; UNROLL-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; UNROLL-NEXT: [[TMP16:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1) ; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: @@ -6310,7 +6304,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP16]] ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1 ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -6331,7 +6325,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] ; UNROLL-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -6370,9 +6364,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[DOTSPLAT]] -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[DOTSPLAT]] ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]] ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: @@ -6391,7 +6383,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP17]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -6412,7 +6404,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] ; UNROLL-NO-IC-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -6445,11 +6437,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] -; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 -; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl <4 x i32> [[BROADCAST_SPLATINSERT]], -; INTERLEAVE-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[DOTSPLAT]], splat (i32 2) ; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: @@ -6466,7 +6456,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[TMP16]] ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3 ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -6487,7 +6477,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] ; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 82a17297f6072..2d83ed25cb185 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -758,9 +758,7 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -778,11 +776,11 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fadd fast float [[FP_IV]], [[FPINC]] @@ -878,9 +876,7 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -898,11 +894,11 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]] @@ -998,9 +994,7 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 ; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 -; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 -; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1018,11 +1012,11 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fsub fast float [[FP_IV]], [[FPINC]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 6411a3031d1af..b695910a38b39 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -478,9 +478,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -509,9 +507,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT11]], [[TMP14]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT14:%.*]] = mul <4 x i8> [[DOTSPLAT9]], splat (i8 4) ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -568,9 +564,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: vector.body: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -599,9 +593,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT11]], [[TMP14]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = mul <2 x i8> [[DOTSPLAT9]], splat (i8 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 2135fc6367918..91d17c45c8979 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -29,9 +29,9 @@ define void @foo() { ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP19]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ] @@ -53,7 +53,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index 0cda697e0337a..fc12f76d3cfcb 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -73,9 +73,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -196,9 +194,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STEP]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -391,9 +387,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[CONV]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index f136b0e2e0b31..a6be32aee779d 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -192,9 +192,9 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = shl [[DOTSPLAT]], splat (i32 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -202,7 +202,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP12]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -272,9 +272,9 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = fmul [[TMP8]], splat (float 2.000000e+00) ; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP6]] to float -; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP13]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = fmul [[DOTSPLAT]], splat (float 2.000000e+00) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -282,7 +282,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: From d183c91430311acaeff7c1a173fa3402093fccba Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 3 Feb 2025 18:17:37 +0800 Subject: [PATCH 13/28] Remove VPStepVectorRecipe, replace with VPInstruction --- .../Vectorize/LoopVectorizationPlanner.h | 6 ++- llvm/lib/Transforms/Vectorize/VPlan.h | 38 ++----------------- .../Transforms/Vectorize/VPlanAnalysis.cpp | 4 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +++++--------- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 - 5 files changed, 19 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index f7c068eaa4d9d..15ebd37d664da 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -265,8 +265,10 @@ class VPBuilder { FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags())); } - VPStepVectorRecipe *createStepVector(Type *Ty) { - return tryInsertInstruction(new VPStepVectorRecipe(Ty)); + VPInstruction *createStepVector(Type *Ty) { + VPValue *TyVal = BB->getPlan()->getOrAddLiveIn(Constant::getNullValue(Ty)); + return tryInsertInstruction( + new VPInstruction(VPInstruction::StepVector, {TyVal})); } //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e1e86fe980cc7..58ba194cca6c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -880,6 +880,9 @@ class VPInstruction : public VPRecipeWithIRFlags, // Extracts the first active lane of a vector, where the first operand is // the predicate, and the second operand is the vector to extract. ExtractFirstActive, + // Creates a step vector starting from 0 with a step of 1. The first operand + // is a dummy constant that should be used to specify the element type. + StepVector, }; private: @@ -1236,41 +1239,6 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { } }; -/// A recipe for generating a step vector. -class VPStepVectorRecipe : public VPSingleDefRecipe { - /// Scalar return type of the intrinsic. - Type *ScalarTy; - -public: - VPStepVectorRecipe(Type *Ty) - : VPSingleDefRecipe(VPDef::VPStepVectorSC, {}), ScalarTy(Ty) {} - - ~VPStepVectorRecipe() override = default; - - VPStepVectorRecipe *clone() override { - return new VPStepVectorRecipe(ScalarTy); - } - - VP_CLASSOF_IMPL(VPDef::VPStepVectorSC) - - void execute(VPTransformState &State) override; - - /// Return the cost of this VPStepVectorRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // TODO: Compute accurate cost after retiring the legacy cost model. - return 0; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Return the scalar return type of the intrinsic. - Type *getScalarType() const { return ScalarTy; } -}; - /// A recipe for widening vector intrinsics. class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { /// ID of the vector intrinsic to widen. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8414b36f3ae22..4e4892005a7be 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -77,6 +77,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::AnyOf: + case VPInstruction::StepVector: return SetResultTyFromOp(); case VPInstruction::ExtractFirstActive: case VPInstruction::ExtractFromEnd: { @@ -243,8 +244,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { // backedge value, here and in cases below. return inferScalarType(R->getStartValue()); }) - .Case( + .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetType(); + return State.Builder.CreateStepVector(VectorType::get(EltTy, State.VF)); + } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -824,6 +825,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::LogicalAnd: case VPInstruction::Not: case VPInstruction::PtrAdd: + case VPInstruction::StepVector: return false; default: return true; @@ -851,6 +853,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: case VPInstruction::ResumePhi: + case VPInstruction::StepVector: return true; }; llvm_unreachable("switch should return"); @@ -942,6 +945,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractFirstActive: O << "extract-first-active"; break; + case VPInstruction::StepVector: + O << "step-vector"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -2291,7 +2297,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { case Instruction::ZExt: case Instruction::Trunc: case Instruction::UIToFP: { - // Note: SExt not used yet. + // Note: SExt/ZExt not used yet. Value *Op = State.get(getOperand(0), VPLane(0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } @@ -2315,20 +2321,6 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, } #endif -void VPStepVectorRecipe::execute(VPTransformState &State) { - VectorType *Ty = VectorType::get(ScalarTy, State.VF); - State.set(this, State.Builder.CreateStepVector(Ty)); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPStepVectorRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent; - printAsOperand(O, SlotTracker); - O << " = STEP-VECTOR"; -} -#endif - void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Lane && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 61422bfe6c0b0..a058b2a121d59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -354,7 +354,6 @@ class VPDef { VPWidenSelectSC, VPBlendSC, VPHistogramSC, - VPStepVectorSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, VPPredInstPHISC, From 79b9700d6f5ce3833ad5bee74b1c0e76a8d5da43 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 18 Feb 2025 18:24:30 +0800 Subject: [PATCH 14/28] Add VPlan debug output test --- .../LoopVectorize/vplan-iv-transforms.ll | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index be1eb78cab607..7068a795cbb5d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -29,7 +29,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: middle.block: + ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -65,3 +65,57 @@ loop.latch: exit: ret void } + +; Check that VPWidenIntOrFPInductionRecipe is expanded into smaller recipes in +; the final VPlan. +define void @iv_expand(ptr %p, i64 %n) { +; CHECK-LABEL: LV: Checking a loop in 'iv_expand' +; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { +; CHECK: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> +; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK: VPlan 'Final VPlan for VF={8},UF={1}' +; CHECK: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: SCALAR-PHI vp<%3> = phi ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-PHI ir<%i> = phi vp<%induction>, vp<%vec.ind.next> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> +; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, ir<8> +; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<%2> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +entry: + br label %loop +loop: + %i = phi i64 [0, %entry], [%i.next, %loop] + %q = getelementptr i64, ptr %p, i64 %i + %x = load i64, ptr %q + %y = add i64 %x, %i + store i64 %y, ptr %q + %i.next = add i64 %i, 1 + %done = icmp eq i64 %i.next, %n + br i1 %done, label %exit, label %loop +exit: + ret void +} From d3d85f6d39486bbc6e62d7a3d449e3a36b636099 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 18 Feb 2025 18:25:17 +0800 Subject: [PATCH 15/28] Undo stray whitespace change --- llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 7068a795cbb5d..a3558ba82f812 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -29,7 +29,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: - ; CHECK-NEXT: middle.block: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph From 8f9461019a153f562b6bcc35a205a13dd90c8b97 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 18 Feb 2025 20:19:36 +0800 Subject: [PATCH 16/28] Fix ARM test --- .../LoopVectorize/ARM/mve-gather-scatter-tailpred.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index fda9ef2cf6c2f..8d9781f605134 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -516,9 +516,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] From dff84ec397549bee30382847dc70d3f0513dee9b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 3 Mar 2025 14:46:15 +0800 Subject: [PATCH 17/28] Remove unnecessary assert --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cb9dc7da2a656..ce04fed44a25d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2105,7 +2105,6 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, if (WidenIVR->getNumOperands() == 5) { Inc = WidenIVR->getSplatVFValue(); Prev = WidenIVR->getLastUnrolledPartOperand(); - assert(Inc && Prev); } else { // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. From 68b6b66c1183f4576f6ca8ace645b2c0991461b7 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 3 Mar 2025 23:33:52 +0800 Subject: [PATCH 18/28] Update vplan test to include preheader, remove references to old recipes, remove stray . --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 ++- .../LoopVectorize/vplan-iv-transforms.ll | 31 ++++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8eeaa491831fb..522dab584cab1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1761,8 +1761,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector values. This won't execute any LLVM IR and will get -/// expanded later into VPWidenIntOrFpInitialRecipe, VPWidenIntOrFpPHIRecipe and -/// VPWidenIntOrFpBackedgeRecipe. +/// expanded later into several other recipes in convertToConcreteRecipes. class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; @@ -1949,7 +1948,7 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and - /// debug location \p DL.. + /// debug location \p DL. VPWidenPHIRecipe(Instruction *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, const Twine &Name = "vec.phi") : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL), diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index a3558ba82f812..8ec397129a134 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -88,19 +88,28 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK: VPlan 'Final VPlan for VF={8},UF={1}' -; CHECK: vector loop: { +; CHECK: ir-bb: +; CHECK-NEXT: IR %n.mod.vf = urem i64 %n, 8 +; CHECK-NEXT: IR %n.vec = sub i64 %n, %n.mod.vf +; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector ir<0> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[STEP_VECTOR]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = add ir<0>, vp<[[MUL]]> +; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = mul ir<1>, ir<8> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<%3> = phi ir<0>, vp<%index.next> -; CHECK-NEXT: WIDEN-PHI ir<%i> = phi vp<%induction>, vp<%vec.ind.next> -; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> -; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> -; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> -; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> +; CHECK-NEXT: SCALAR-PHI vp<[[SCALAR_PHI:%.+]]> = phi ir<0>, vp<%index.next> +; CHECK-NEXT: WIDEN-PHI ir<%i> = phi vp<[[INDUCTION]]>, vp<%vec.ind.next> +; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[SCALAR_PHI]]>, ir<1> +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_STEPS]]> +; CHECK-NEXT: vp<[[VEC_PTR_1:%.+]]> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN ir<%x> = load vp<[[VEC_PTR_1]]> ; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> -; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> -; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, ir<8> -; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<%2> +; CHECK-NEXT: vp<[[VEC_PTR_2:%.+]]> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR_2]]>, ir<%y> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> +; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<[[INC]]> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } From 0f8b4f6361aa13acdac414b0bf0d488cdab9cade Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 9 May 2025 00:12:03 +0800 Subject: [PATCH 19/28] Update now that #129508 is landed * Remove temp stepvector operand from VPWidenIntOrFpInductionRecipe * Use VPInstruction::Broadcast --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 - llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 ++- llvm/lib/Transforms/Vectorize/VPlan.h | 12 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 61 +++----- .../Transforms/Vectorize/VPlanTransforms.h | 5 - .../AArch64/clamped-trip-count.ll | 16 +-- .../AArch64/conditional-branches-cost.ll | 4 +- .../AArch64/divs-with-scalable-vfs.ll | 16 +-- .../AArch64/epilog-iv-select-cmp.ll | 12 +- .../LoopVectorize/AArch64/optsize_minsize.ll | 8 +- .../AArch64/outer_loop_prefer_scalable.ll | 8 +- .../AArch64/simple_early_exit.ll | 2 +- .../AArch64/sve-inductions-unusual-types.ll | 8 +- .../AArch64/sve-interleaved-accesses.ll | 22 +-- .../sve-interleaved-masked-accesses.ll | 132 +++++++++--------- .../LoopVectorize/AArch64/sve-tail-folding.ll | 6 +- .../LoopVectorize/AArch64/sve-widen-phi.ll | 2 +- ...eave-to-widen-memory-remove-loop-region.ll | 2 +- .../ARM/mve-gather-scatter-tailpred.ll | 4 +- .../LoopVectorize/ARM/optsize_minsize.ll | 2 +- .../RISCV/blocks-with-dead-instructions.ll | 42 +++--- .../LoopVectorize/RISCV/mask-index-type.ll | 8 +- .../RISCV/masked_gather_scatter.ll | 12 +- ...ruction-or-drop-poison-generating-flags.ll | 8 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 2 +- .../LoopVectorize/RISCV/strided-accesses.ll | 20 +-- .../LoopVectorize/RISCV/uniform-load-store.ll | 10 +- ...rize-force-tail-with-evl-cond-reduction.ll | 26 ++-- ...ectorize-force-tail-with-evl-interleave.ll | 2 +- .../LoopVectorize/X86/constant-fold.ll | 4 +- .../LoopVectorize/X86/induction-costs.ll | 24 ++-- .../LoopVectorize/X86/scatter_crash.ll | 16 +-- .../LoopVectorize/create-induction-resume.ll | 4 +- .../first-order-recurrence-chains.ll | 2 +- .../LoopVectorize/iv_outside_user.ll | 36 +++-- .../optimal-epilog-vectorization.ll | 32 +++-- .../LoopVectorize/outer_loop_scalable.ll | 6 +- .../LoopVectorize/pointer-induction.ll | 14 +- .../pr30654-phiscev-sext-trunc.ll | 12 +- .../LoopVectorize/scalable-inductions.ll | 12 +- .../LoopVectorize/scalable-iv-outside-user.ll | 2 +- .../single_early_exit_live_outs.ll | 2 +- .../LoopVectorize/vplan-iv-transforms.ll | 19 +-- 44 files changed, 327 insertions(+), 340 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 976458de903e8..46abc19644336 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7782,7 +7782,6 @@ DenseMap LoopVectorizationPlanner::executePlan( "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); - VPlanTransforms::materializeStepVectors(BestVPlan); // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index fa98801d07018..79addfd296b16 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -317,17 +317,15 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { LastLane = 0; } + auto *LastInst = cast(get(Def, LastLane)); + // Set the insert point after the last scalarized instruction or after the + // last PHI, if LastInst is a PHI. This ensures the insertelement sequence + // will directly follow the scalar definitions. auto OldIP = Builder.saveIP(); - auto *LastVal = get(Def, LastLane); - if (auto *LastInst = dyn_cast(LastVal)) { - // Set the insert point after the last scalarized instruction or after the - // last PHI, if LastInst is a PHI. This ensures the insertelement sequence - // will directly follow the scalar definitions. - auto NewIP = isa(LastInst) - ? LastInst->getParent()->getFirstNonPHIIt() - : std::next(BasicBlock::iterator(LastInst)); - Builder.SetInsertPoint(&*NewIP); - } + auto NewIP = isa(LastInst) + ? LastInst->getParent()->getFirstNonPHIIt() + : std::next(BasicBlock::iterator(LastInst)); + Builder.SetInsertPoint(&*NewIP); // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just @@ -342,7 +340,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = PoisonValue::get(toVectorizedTy(LastVal->getType(), VF)); + Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorizedValue(Def, Lane); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ca2e4dd8285b9..98a2d30d9b1d8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1880,7 +1880,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; // If this recipe is unrolled it will have 2 additional operands. - bool isUnrolled() const { return getNumOperands() == 6; } + bool isUnrolled() const { return getNumOperands() == 5; } public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, @@ -1930,16 +1930,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPValue *getVFValue() { return getOperand(2); } const VPValue *getVFValue() const { return getOperand(2); } - // TODO: Remove once VPWidenIntOrFpInduction is fully expanded in - // convertToConcreteRecipes. - VPInstructionWithType *getStepVector() { - auto *StepVector = - cast(getOperand(3)->getDefiningRecipe()); - assert(StepVector->getOpcode() == VPInstruction::StepVector && - "step vector operand must be a VPInstruction::StepVector"); - return StepVector; - } - VPValue *getSplatVFValue() { // If the recipe has been unrolled return the VPValue for the induction // increment. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d074bafea3319..5e4967cde0e77 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -776,7 +776,6 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, true, Name); } - default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -967,6 +966,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: case VPInstruction::ResumePhi: + case VPInstruction::Broadcast: return true; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); @@ -1087,15 +1087,14 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPInstructionWithType::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - switch (getOpcode()) { - case Instruction::ZExt: - case Instruction::Trunc: { + if (isScalarCast()) { Value *Op = State.get(getOperand(0), VPLane(0)); Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()), Op, ResultTy); State.set(this, Cast, VPLane(0)); - break; + return; } + switch (getOpcode()) { case VPInstruction::StepVector: { Value *StepVector = State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 67e75bf1dc9f5..d572dbbc3d951 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1223,16 +1223,6 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, WideIV->setStartValue(NewStart); auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1)); WideIV->setStepValue(NewStep); - // TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded. - VPInstructionWithType *OldStepVector = WideIV->getStepVector(); - assert(OldStepVector->getNumUsers() == 1 && - "step vector should only be used by single " - "VPWidenIntOrFpInductionRecipe"); - auto *NewStepVector = new VPInstructionWithType( - VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc()); - NewStepVector->insertAfter(OldStepVector->getDefiningRecipe()); - OldStepVector->replaceAllUsesWith(NewStepVector); - OldStepVector->eraseFromParent(); auto *NewBTC = new VPWidenCastRecipe( Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy); @@ -2459,15 +2449,20 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, } // Construct the initial value of the vector IV in the vector loop preheader. - Type *IVIntTy = IntegerType::get(IV->getContext(), Ty->getScalarSizeInBits()); + Type *StepTy = TypeInfo.inferScalarType(Step); + Type *IVIntTy = + IntegerType::get(IV->getContext(), StepTy->getScalarSizeInBits()); VPValue *Init = Builder.createNaryOp(VPInstruction::StepVector, {}, IVIntTy); - if (Ty->isFloatingPointTy()) - Init = Builder.createWidenCast(Instruction::UIToFP, Init, Ty); + if (StepTy->isFloatingPointTy()) + Init = Builder.createWidenCast(Instruction::UIToFP, Init, StepTy); + + VPValue *SplatStart = Builder.createNaryOp(VPInstruction::Broadcast, Start); + VPValue *SplatStep = Builder.createNaryOp(VPInstruction::Broadcast, Step); // FIXME: The newly created binary instructions should contain nsw/nuw // flags, which can be found from the original scalar operations. - Init = Builder.createNaryOp(MulOp, {Init, Step}, FMFs); - Init = Builder.createNaryOp(AddOp, {Start, Init}, FMFs, {}, "induction"); + Init = Builder.createNaryOp(MulOp, {Init, SplatStep}, FMFs); + Init = Builder.createNaryOp(AddOp, {SplatStart, Init}, FMFs, {}, "induction"); // Create the widened phi of the vector IV. auto *WidePHI = @@ -2479,18 +2474,21 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPValue *Inc; VPValue *Prev; // If unrolled, use the increment and prev value from the operands. - if (WidenIVR->getNumOperands() == 5) { - Inc = WidenIVR->getSplatVFValue(); + if (auto *SplatVF = WidenIVR->getSplatVFValue()) { + Inc = SplatVF; Prev = WidenIVR->getLastUnrolledPartOperand(); } else { // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. - if (Ty->isFloatingPointTy()) - VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, Ty, DL); - else if (Ty != TypeInfo.inferScalarType(VF)) - VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL); + if (StepTy->isFloatingPointTy()) + VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy, + DL); + else + VF = + Builder.createScalarCast(Instruction::CastOps::Trunc, VF, StepTy, DL); Inc = Builder.createNaryOp(MulOp, {Step, VF}, FMFs); + Inc = Builder.createNaryOp(VPInstruction::Broadcast, Inc); Prev = WidePHI; } @@ -2691,27 +2689,6 @@ void VPlanTransforms::handleUncountableEarlyExit( LatchExitingBranch->eraseFromParent(); } -void VPlanTransforms::materializeStepVectors(VPlan &Plan) { - for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - auto *IVR = dyn_cast(&Phi); - if (!IVR) - continue; - - Type *Ty = IVR->getPHINode()->getType(); - if (TruncInst *Trunc = IVR->getTruncInst()) - Ty = Trunc->getType(); - if (Ty->isFloatingPointTy()) - Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits()); - - VPBuilder Builder(Plan.getVectorPreheader()); - VPInstruction *StepVector = Builder.createNaryOp( - VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc()); - assert(IVR->getNumOperands() == 3 && - "can only add step vector before unrolling"); - IVR->addOperand(StepVector); - } -} - void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { if (Plan.hasScalarVFOnly()) return; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 7a05816f2e2da..9e8b518a0c7eb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -199,11 +199,6 @@ struct VPlanTransforms { optimizeInductionExitUsers(VPlan &Plan, DenseMap &EndValues); - /// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes. - /// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in - /// convertToConcreteRecipes. - static void materializeStepVectors(VPlan &Plan); - /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. static void materializeBroadcasts(VPlan &Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 9c69c5f1de223..24c703ae42f0a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -16,14 +16,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -37,7 +37,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] @@ -100,14 +100,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -121,7 +121,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f36161703dba5..976f95ff4f0ba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -862,8 +862,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; DEFAULT: [[PRED_STORE_CONTINUE14]]: -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[EXIT:.*]] @@ -964,8 +964,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; PRED: [[PRED_STORE_CONTINUE14]]: -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index a048ef5147992..d59607711b5bf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -123,14 +123,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -154,7 +154,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP36:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP18]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP37:%.*]] = extractelement [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -246,14 +246,14 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -280,7 +280,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP47:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP18]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP48:%.*]] = extractelement [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index c0806ea16a5fc..70a1f7f844710 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -87,12 +87,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3 @@ -185,12 +185,12 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i32> poison, i32 [[START]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT9]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT10]], zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT14]], +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 04b859337e663..c1e4aa58a05ec 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -397,9 +397,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE36]] ; DEFAULT: [[PRED_STORE_CONTINUE36]]: +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 71e07f01651cd..3302103873bd3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -23,9 +23,9 @@ define void @foo() { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] @@ -47,7 +47,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi [ [[TMP11]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI4]], [[TMP9]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 74b0c2c0e033a..d02d03b4b437d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -120,8 +120,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 49584bd47353d..f44744071ae58 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -20,11 +20,11 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i7 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,11 +85,11 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i3 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index fe68501ad94f4..c66d1de4eb4b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -101,11 +101,11 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -185,11 +185,11 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -579,9 +579,9 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] ; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 @@ -809,9 +809,9 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -958,9 +958,9 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1113,13 +1113,13 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 @@ -1191,13 +1191,13 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 @@ -1284,14 +1284,14 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP34]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 469faf67a71b3..01bb9b7b2ab8a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -37,11 +37,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -60,10 +60,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub zeroinitializer, [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP13]], [[TMP16]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK1]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -83,11 +83,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -108,11 +108,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub zeroinitializer, [[TMP13]] ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP13]], [[TMP16]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -182,11 +182,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -201,7 +201,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP12]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP13]], i32 1, [[TMP10]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -221,11 +221,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -243,7 +243,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP13]], i32 1, [[TMP10]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -309,13 +309,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -331,7 +331,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP14]], i32 1, [[TMP11]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -352,13 +352,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -378,7 +378,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP15]], i32 1, [[TMP12]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -456,11 +456,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -470,18 +470,18 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK2:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK]], [[INTERLEAVED_MASK1]]) -; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, [[INTERLEAVED_MASK2]], poison) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK4:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK]], [[INTERLEAVED_MASK3]]) +; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, [[INTERLEAVED_MASK4]], poison) ; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv64i8( [[WIDE_MASKED_VEC]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC3:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP11]]) -; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC4:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP12]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 0 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 0 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC5:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP11]]) +; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC6:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP12]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC5]], 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC6]], 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC5]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC6]], 1 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = call @llvm.smax.nxv16i8( [[TMP13]], [[TMP14]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sub zeroinitializer, [[TMP17]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call @llvm.smax.nxv16i8( [[TMP15]], [[TMP16]]) @@ -489,14 +489,14 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = sext i32 [[TMP8]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP21]] ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP17]], [[TMP19]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC5:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP18]], [[TMP20]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC6:%.*]] = call @llvm.vector.interleave2.nxv64i8( [[INTERLEAVED_VEC]], [[INTERLEAVED_VEC5]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK7:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK8:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK9:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK7]], [[INTERLEAVED_MASK8]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC6]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK9]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC7:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP18]], [[TMP20]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC8:%.*]] = call @llvm.vector.interleave2.nxv64i8( [[INTERLEAVED_VEC]], [[INTERLEAVED_VEC7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK9:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK10:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK11:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK9]], [[INTERLEAVED_MASK10]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC8]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK11]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -516,11 +516,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -532,18 +532,18 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK2:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK]], [[INTERLEAVED_MASK1]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, [[INTERLEAVED_MASK2]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK4:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK]], [[INTERLEAVED_MASK3]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, [[INTERLEAVED_MASK4]], poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv64i8( [[WIDE_MASKED_VEC]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC3:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP11]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC4:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC3]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC4]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC5:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP11]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC6:%.*]] = call { , } @llvm.vector.deinterleave2.nxv32i8( [[TMP12]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC5]], 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC6]], 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC5]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC6]], 1 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = call @llvm.smax.nxv16i8( [[TMP13]], [[TMP14]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sub zeroinitializer, [[TMP17]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call @llvm.smax.nxv16i8( [[TMP15]], [[TMP16]]) @@ -551,15 +551,15 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = sext i32 [[TMP8]] to i64 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP21]] ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP17]], [[TMP19]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC5:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP18]], [[TMP20]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC6:%.*]] = call @llvm.vector.interleave2.nxv64i8( [[INTERLEAVED_VEC]], [[INTERLEAVED_VEC5]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK7:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK8:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK9:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK7]], [[INTERLEAVED_MASK8]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC6]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK9]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC7:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP18]], [[TMP20]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC8:%.*]] = call @llvm.vector.interleave2.nxv64i8( [[INTERLEAVED_VEC]], [[INTERLEAVED_VEC7]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK9:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK10:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK11:%.*]] = call @llvm.vector.interleave2.nxv64i1( [[INTERLEAVED_MASK9]], [[INTERLEAVED_MASK10]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC8]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK11]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 4dd26a8ce6981..46dcf23ee9cff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -188,9 +188,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = mul splat (i64 4), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -203,7 +203,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 6947884efb699..047b2b5d35d7b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -237,8 +237,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 2) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 25e1e2d4cab1e..efe40a8f61d6a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -109,8 +109,8 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; VF4: [[PRED_STORE_CONTINUE6]]: -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 6234aed1f99d8..9522c7e7b61a8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -509,7 +509,9 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 1ce272bcfb783..77f120110a4c3 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -393,9 +393,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE36]] ; DEFAULT: [[PRED_STORE_CONTINUE36]]: +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index dc1ad5dcaa073..a1a5deb93f6c7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -28,9 +28,9 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -43,7 +43,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -122,9 +122,9 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -137,7 +137,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -216,9 +216,9 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -231,7 +231,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -320,9 +320,9 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -335,7 +335,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -426,9 +426,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -441,7 +441,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( zeroinitializer, [[TMP17]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -540,9 +540,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 ; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 @@ -555,7 +555,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP14]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -854,9 +854,9 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP18]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP20]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 3, [[TMP17]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP23]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = mul splat (i64 3), [[DOTSPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -864,7 +864,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( zeroinitializer, [[TMP24]], i32 4, splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META21:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP21]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 4223f4d7f3c4d..3e4d337c0706c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -22,14 +22,14 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VLENUNK-NEXT: [[TMP9:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -44,7 +44,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 ; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP19]], align 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 8b01a4b4cd904..2276b592aac8a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -44,9 +44,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) ; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; RV32-NEXT: [[TMP10:%.*]] = mul splat (i64 16), [[DOTSPLAT]] ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -62,7 +62,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP10]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: @@ -123,9 +123,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i64 16) ; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; RV64-NEXT: [[TMP10:%.*]] = mul splat (i64 16), [[DOTSPLAT]] ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -141,7 +141,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP10]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index db175d90d3ff8..9d6372e8ccca2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -20,18 +20,18 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -52,7 +52,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0( zeroinitializer, ptr [[TMP25]], i32 2, [[TMP22]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 9cf7bc9fe07d6..c9c98590bcbf8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -137,8 +137,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue32: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT1:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 0896d7cdc958a..79590f5060ad4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -22,9 +22,9 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP7]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -35,7 +35,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP16]], [[TMP15]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP11]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,9 +92,9 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 64) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP5]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul splat (i64 64), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -104,7 +104,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -553,14 +553,14 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] -; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]] +; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP41:%.*]] = mul splat (i64 1), [[BROADCAST_SPLAT]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -572,7 +572,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP20]], [[TMP21]], i32 4, splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP41]] +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 70c04ded5cf57..827612cfe36d5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,9 +325,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -432,9 +432,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -996,11 +996,11 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -1127,11 +1127,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -1233,11 +1233,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 8c62a0664f8e3..08cb60e36ed16 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -582,13 +582,12 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -602,7 +601,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select [[TMP27]], [[WIDE_MASKED_LOAD]], zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -647,9 +646,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 +; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -663,7 +662,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -773,13 +772,12 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -793,7 +791,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select [[TMP28]], [[TMP22]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] -; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP13]] +; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -842,9 +840,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 +; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul splat (i32 1), [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -858,7 +856,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP9]] +; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index 427123cfca6d4..c616fc1b11b93 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -29,9 +29,9 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[TMP12:%.*]] = mul [[TMP10]], splat (i64 1) ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 7d9ed7d6215c5..05a495d51c458 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -110,8 +110,8 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -214,8 +214,8 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 554918bcc6cc0..4b94f5503c1c0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -700,13 +700,15 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -771,13 +773,15 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -845,13 +849,15 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> , [[DOTSPLAT2]] -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]] -; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 4038ace617c17..99650592d2dea 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -131,7 +131,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]] -; CHECK: vector.body28: +; CHECK: vector.body30: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY29]] ] @@ -153,18 +153,18 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK35:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block35: +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK37:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block37: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]] -; CHECK: vec.epilog.iter.check42: +; CHECK: vec.epilog.iter.check44: ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH40]], label [[VEC_EPILOG_PH42]] -; CHECK: vec.epilog.ph41: +; CHECK: vec.epilog.ph43: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] @@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY49:%.*]] -; CHECK: vec.epilog.vector.body49: +; CHECK: vec.epilog.vector.body57: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] ; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] ; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] @@ -206,10 +206,10 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY49]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: vec.epilog.middle.block62: +; CHECK: vec.epilog.middle.block64: ; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N65]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH40]] -; CHECK: vec.epilog.scalar.ph40: +; CHECK: vec.epilog.scalar.ph42: ; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[ITER_CHECK22]] ] ; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[ITER_CHECK22]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index 9e3aee9453153..71273a30f846d 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -52,7 +52,9 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT1]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = mul <4 x i32> [[DOTSPLAT1]], splat (i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 89268ac25c345..d5faaa122d038 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -587,8 +587,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) { ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ , %vector.ph ], [ [[TMP0:%.*]], %vector.body ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 1d39647eee3b0..f801443b85d3f 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -748,13 +748,15 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] +; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] -; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) +; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -771,11 +773,11 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fadd fast float [[FP_IV]], [[FPINC]] @@ -862,13 +864,15 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] +; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] -; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) +; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -885,11 +889,11 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]] @@ -976,13 +980,15 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]] +; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 ; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> , [[DOTSPLAT2]] -; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 -; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] -; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00) +; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 +; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -999,11 +1005,11 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4 ; VEC-NEXT: [[ADD]] = fsub fast float [[FP_IV]], [[FPINC]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index dcf428a4d099a..1480bc930a5d2 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -468,7 +468,9 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4) +; CHECK-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -490,13 +492,15 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i8> , [[DOTSPLAT9]] -; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT11]], [[TMP14]] -; CHECK-NEXT: [[DOTSPLAT14:%.*]] = mul <4 x i8> [[DOTSPLAT9]], splat (i8 4) +; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> , [[DOTSPLAT11]] +; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -552,7 +556,9 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = mul <4 x i8> , [[DOTSPLAT]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: vector.body: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -574,13 +580,15 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP14:%.*]] = mul <2 x i8> , [[DOTSPLAT9]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT11]], [[TMP14]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = mul <2 x i8> [[DOTSPLAT9]], splat (i8 2) +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT11]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 91d17c45c8979..2135fc6367918 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -29,9 +29,9 @@ define void @foo() { ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul splat (i64 1), [[DOTSPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ] @@ -53,7 +53,7 @@ define void @foo() { ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 291dcb9e02b28..a5b360b2c79c5 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -304,16 +304,15 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: -; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 -; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -361,16 +360,15 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: -; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; STRIDED-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 -; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]] +; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] ; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 -; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]] +; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] ; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index 313ce2f53b78d..b2da8c73377e4 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -73,7 +73,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) +; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -193,7 +195,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]] -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STEP]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -385,7 +389,9 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> , [[DOTSPLAT]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]] -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[CONV]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index a6be32aee779d..f136b0e2e0b31 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -192,9 +192,9 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = shl [[DOTSPLAT]], splat (i32 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -202,7 +202,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[TMP12]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -272,9 +272,9 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = fmul [[TMP8]], splat (float 2.000000e+00) ; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP6]] to float -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul [[DOTSPLAT]], splat (float 2.000000e+00) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -282,7 +282,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 2a48e0a5e5310..788535d6a0c5d 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -19,9 +19,9 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[STEP_2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 5ff43dcf42bcf..ec1e8fa1e1b33 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -918,8 +918,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 8ec397129a134..c67c448ae5470 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -91,25 +91,28 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK: ir-bb: ; CHECK-NEXT: IR %n.mod.vf = urem i64 %n, 8 ; CHECK-NEXT: IR %n.vec = sub i64 %n, %n.mod.vf -; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector ir<0> -; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[STEP_VECTOR]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = add ir<0>, vp<[[MUL]]> -; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = mul ir<1>, ir<8> +; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector +; CHECK-NEXT: EMIT vp<[[BROADCAST_0:%.+]]> = broadcast ir<0> +; CHECK-NEXT: EMIT vp<[[BROADCAST_1:%.+]]> = broadcast ir<1> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[STEP_VECTOR]]>, vp<[[BROADCAST_1]]> +; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = add vp<[[BROADCAST_0]]>, vp<[[MUL]]> +; CHECK-NEXT: EMIT vp<[[TRUNC:%.+]]> = trunc ir<8> to i64 +; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = mul ir<1>, vp<[[TRUNC]]> +; CHECK-NEXT: EMIT vp<[[BROADCAST_INC:%.+]]> = broadcast vp<[[INC]]> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<[[SCALAR_PHI:%.+]]> = phi ir<0>, vp<%index.next> +; CHECK-NEXT: EMIT vp<[[SCALAR_PHI:%.+]]> = phi ir<0>, vp<%index.next> ; CHECK-NEXT: WIDEN-PHI ir<%i> = phi vp<[[INDUCTION]]>, vp<%vec.ind.next> -; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[SCALAR_PHI]]>, ir<1> -; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_STEPS]]> +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]> ; CHECK-NEXT: vp<[[VEC_PTR_1:%.+]]> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN ir<%x> = load vp<[[VEC_PTR_1]]> ; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> ; CHECK-NEXT: vp<[[VEC_PTR_2:%.+]]> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR_2]]>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> -; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<[[INC]]> +; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<[[BROADCAST_INC]]> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } From 7fc48590c0a7dad85ee7deac12e090eeddf9f14a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 9 May 2025 00:18:11 +0800 Subject: [PATCH 20/28] clang-format --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d572dbbc3d951..2d9b5a5f20598 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2525,7 +2525,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, for (VPRecipeBase *R : PointerIVs) R->moveBefore(*VPBB, VPBB->getFirstNonPhi()); - for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (isa(&R)) { auto *PhiR = cast(&R); From 61bd641f48262c09c6f4f11f094abb7ddecd1c27 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 14 May 2025 15:22:49 +0100 Subject: [PATCH 21/28] Fix typo, use TypeInfo in assert --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2d9b5a5f20598..a931f799aae09 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2380,8 +2380,8 @@ void VPlanTransforms::createInterleaveGroups( } } -/// Expand a VPWidenIntOrFpInduction into executable recipes. for the initial -/// value, phi and backedge value. In the followng example: +/// Expand a VPWidenIntOrFpInduction into executable recipes, for the initial +/// value, phi and backedge value. In the following example: /// /// vector.ph: /// Successor(s): vector loop @@ -2399,7 +2399,7 @@ void VPlanTransforms::createInterleaveGroups( /// vector.ph: /// ... /// vp<%induction> = ... -/// vp<%inc> = ... +/// vp<%induction.increment> = ... /// /// Successor(s): vector loop /// @@ -2407,7 +2407,7 @@ void VPlanTransforms::createInterleaveGroups( /// vector.body: /// ir<%i> = WIDEN-PHI vp<%induction>, vp<%vec.ind.next> /// ... -/// vp<%vec.ind.next> = add ir<%i>, vp<%inc> +/// vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment> /// EMIT branch-on-count ... /// No successors /// } @@ -2442,7 +2442,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, // If the phi is truncated, truncate the start and step values. VPBuilder Builder(Plan->getVectorPreheader()); if (isa(IV)) { - assert(Start->getUnderlyingValue()->getType()->isIntegerTy() && + assert(TypeInfo.inferScalarType(Start)->isIntegerTy() && "Truncation requires an integer type"); Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL); Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL); From ec5fe59bf46e7a81362165b68cb64d0701f2a2ff Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 14 May 2025 15:54:39 +0100 Subject: [PATCH 22/28] Don't move VPWidenPointerInductionRecipe, instead fixup location of phi in VPWidenPHI::execute --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 7 ++++++- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 -------------- .../LoopVectorize/AArch64/sve-widen-phi.ll | 2 +- .../LoopVectorize/first-order-recurrence-chains.ll | 2 +- .../Transforms/LoopVectorize/pointer-induction.ll | 7 ++++--- 5 files changed, 12 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5e4967cde0e77..63565ceb7a13e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3659,7 +3659,12 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPWidenPHIRecipe::execute(VPTransformState &State) { Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); - Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); + Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); + // Manually move it with the other PHIs in case PHI recipes above this one + // also inserted non-phi instructions. + // TODO: Remove once VPWidenPointerInductionRecipe is also expanded in + // convertToConcreteRecipes. + VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt()); State.set(this, VecPhi); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a931f799aae09..6b8b495d979dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2511,20 +2511,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { - - // Move VPWidenPointerInductionRecipes to the back of the phis - // since it may insert non-phi instructions in place, which will - // interfere with other header phis if they come after. - // - // TODO: Expand out VPWidenPointerInductionRecipe into multiple - // recipes here and remove this - SmallVector PointerIVs; - for (VPRecipeBase &R : VPBB->phis()) - if (isa(R)) - PointerIVs.push_back(&R); - for (VPRecipeBase *R : PointerIVs) - R->moveBefore(*VPBB, VPBB->getFirstNonPhi()); - for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (isa(&R)) { auto *PhiR = cast(&R); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 047b2b5d35d7b..6947884efb699 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -237,8 +237,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 2) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index d5faaa122d038..89268ac25c345 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -587,8 +587,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) { ; CHECK-NEXT: br label %vector.body ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ , %vector.ph ], [ [[TMP0:%.*]], %vector.body ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ , %vector.ph ], [ [[TMP0:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index a5b360b2c79c5..c8899f6f582a9 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -283,7 +283,8 @@ for.end: ; preds = %for.cond ret void } -; Test that WidenPointerInductionRecipes are ordered after the other header phis +; Test that when WidenPointerInductionRecipes are ordered before other +; WidenIntOrFpInductionRecipes that their PHIs are emitted in the right place. define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-LABEL: @outside_lattice( ; DEFAULT-NEXT: entry: @@ -305,8 +306,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] @@ -361,8 +362,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] From b315afb700c5d01a4b2cdc37cc4954e1bd7485ef Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 21 May 2025 15:19:17 +0100 Subject: [PATCH 23/28] Fix comments --- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 40a44919a88ad..065cb91f190ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1959,8 +1959,8 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector values. This won't execute any LLVM IR and will get -/// expanded later into several other recipes in convertToConcreteRecipes. +/// producing their vector values. This is an abstract recipe and must be +/// converted to concrete recipes before executing. class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 62ef4d88d24f5..187823af70920 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2514,14 +2514,14 @@ void VPlanTransforms::createInterleaveGroups( /// /// vector.ph: /// ... -/// vp<%induction> = ... +/// vp<%induction.start> = ... /// vp<%induction.increment> = ... /// /// Successor(s): vector loop /// /// vector loop: { /// vector.body: -/// ir<%i> = WIDEN-PHI vp<%induction>, vp<%vec.ind.next> +/// ir<%i> = WIDEN-PHI vp<%induction.start>, vp<%vec.ind.next> /// ... /// vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment> /// EMIT branch-on-count ... From 993ba23953bf7720562fe3814e38449a2dce3818 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 21 May 2025 16:07:12 +0100 Subject: [PATCH 24/28] Remove VPWidenPHIRecipe change --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 065cb91f190ae..8660a988babae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2109,7 +2109,7 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and /// debug location \p DL. - VPWidenPHIRecipe(Instruction *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, const Twine &Name = "") : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL), Name(Name.str()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 187823af70920..6d25bb7b1f47c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2581,8 +2581,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, Init = Builder.createNaryOp(AddOp, {SplatStart, Init}, FMFs, {}, "induction"); // Create the widened phi of the vector IV. - auto *WidePHI = - new VPWidenPHIRecipe(IV, nullptr, WidenIVR->getDebugLoc(), "vec.ind"); + auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), nullptr, + WidenIVR->getDebugLoc(), "vec.ind"); WidePHI->addOperand(Init); WidePHI->insertBefore(WidenIVR); From 7cf85b9091571517d5b97ef84275a3bc377617a8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Jun 2025 13:36:01 +0100 Subject: [PATCH 25/28] Infer and check types instead of peeking through underlying value for trunc --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ac3b4f2dcb6f2..0617667305e21 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2545,13 +2545,11 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPValue *Start = WidenIVR->getStartValue(); VPValue *Step = WidenIVR->getStepValue(); VPValue *VF = WidenIVR->getVFValue(); - TruncInst *Trunc = WidenIVR->getTruncInst(); DebugLoc DL = WidenIVR->getDebugLoc(); // The value from the original loop to which we are mapping the new induction // variable. - Instruction *IV = Trunc ? cast(Trunc) : WidenIVR->getPHINode(); - Type *Ty = IV->getType(); + Type *Ty = TypeInfo.inferScalarType(WidenIVR); const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); Instruction::BinaryOps AddOp; @@ -2568,17 +2566,17 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, // If the phi is truncated, truncate the start and step values. VPBuilder Builder(Plan->getVectorPreheader()); - if (isa(IV)) { - assert(TypeInfo.inferScalarType(Start)->isIntegerTy() && - "Truncation requires an integer type"); + Type *StepTy = TypeInfo.inferScalarType(Step); + if (Ty->getScalarSizeInBits() < StepTy->getScalarSizeInBits()) { + assert(StepTy->isIntegerTy() && "Truncation requires an integer type"); Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL); Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL); + StepTy = Ty; } // Construct the initial value of the vector IV in the vector loop preheader. - Type *StepTy = TypeInfo.inferScalarType(Step); Type *IVIntTy = - IntegerType::get(IV->getContext(), StepTy->getScalarSizeInBits()); + IntegerType::get(StepTy->getContext(), StepTy->getScalarSizeInBits()); VPValue *Init = Builder.createNaryOp(VPInstruction::StepVector, {}, IVIntTy); if (StepTy->isFloatingPointTy()) Init = Builder.createWidenCast(Instruction::UIToFP, Init, StepTy); From 4f3acf08623e593b23a166f75adc5130c56a6644 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Jun 2025 17:42:55 +0100 Subject: [PATCH 26/28] Move comment to flags --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0617667305e21..2eb03fa26408b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2554,6 +2554,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); Instruction::BinaryOps AddOp; Instruction::BinaryOps MulOp; + // FIXME: The newly created binary instructions should contain nsw/nuw + // flags, which can be found from the original scalar operations. VPIRFlags Flags; if (ID.getKind() == InductionDescriptor::IK_IntInduction) { AddOp = Instruction::Add; @@ -2584,8 +2586,6 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPValue *SplatStart = Builder.createNaryOp(VPInstruction::Broadcast, Start); VPValue *SplatStep = Builder.createNaryOp(VPInstruction::Broadcast, Step); - // FIXME: The newly created binary instructions should contain nsw/nuw - // flags, which can be found from the original scalar operations. Init = Builder.createNaryOp(MulOp, {Init, SplatStep}, Flags); Init = Builder.createNaryOp(AddOp, {SplatStart, Init}, Flags, {}, "induction"); From e8d64a420ffaaaa1314948a23e954837c4670c00 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Jun 2025 17:43:26 +0100 Subject: [PATCH 27/28] Use ToRemove --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2eb03fa26408b..11f0f2a930329 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2626,7 +2626,6 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, WidePHI->addOperand(Next); WidenIVR->replaceAllUsesWith(WidePHI); - WidenIVR->eraseFromParent(); } void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { @@ -2738,6 +2737,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, if (auto *WidenIVR = dyn_cast(&R)) { expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo); + ToRemove.push_back(WidenIVR); continue; } From f34b5691d520d0c045c39ec0050ceabf6d938907 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Jun 2025 17:47:57 +0100 Subject: [PATCH 28/28] Remove null ptr in test, update variable names --- .../LoopVectorize/pointer-induction.ll | 18 +++++++-------- .../LoopVectorize/vplan-iv-transforms.ll | 22 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index c8899f6f582a9..a4f2b077cb066 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -301,16 +301,16 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] ; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] @@ -325,7 +325,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] ; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: @@ -357,16 +357,16 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] ; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[OFFSET_IDX]] +; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]] ; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 ; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] @@ -381,7 +381,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] ; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; STRIDED-NEXT: br label [[FOR_BODY:%.*]] ; STRIDED: for.body: @@ -402,7 +402,7 @@ entry: br label %for.body for.body: ; preds = %for.body, %entry - %iv.ptr = phi ptr [ null, %entry ], [ %iv.ptr.next, %for.body ] + %iv.ptr = phi ptr [ %p, %entry ], [ %iv.ptr.next, %for.body ] %iv.int = phi i32 [ 0, %entry ], [ %iv.int.next, %for.body ] %p.gep = getelementptr inbounds ptr, ptr %p, i32 %iv.int diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 7957145644ed3..7654bc9a141e0 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -74,12 +74,12 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> ; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> -; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> ; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> @@ -103,28 +103,30 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-PHI ir<%i> = phi [ vp<[[INDUCTION]]>, ir-bb ], [ vp<%vec.ind.next>, vector.body ] +; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, ir-bb ], [ vp<%vec.ind.next>, vector.body ] ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]> ; CHECK-NEXT: vp<[[VEC_PTR_1:%.+]]> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN ir<%x> = load vp<[[VEC_PTR_1]]> -; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> ; CHECK-NEXT: vp<[[VEC_PTR_2:%.+]]> = vector-pointer ir<%q> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR_2]]>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> -; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<[[BROADCAST_INC]]> +; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%iv>, vp<[[BROADCAST_INC]]> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> ; CHECK-NEXT: Successor(s): middle.block, vector.body entry: br label %loop + loop: - %i = phi i64 [0, %entry], [%i.next, %loop] - %q = getelementptr i64, ptr %p, i64 %i + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %q = getelementptr i64, ptr %p, i64 %iv %x = load i64, ptr %q - %y = add i64 %x, %i + %y = add i64 %x, %iv store i64 %y, ptr %q - %i.next = add i64 %i, 1 - %done = icmp eq i64 %i.next, %n + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, %n br i1 %done, label %exit, label %loop + exit: ret void }