diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f2d4fe7f6ac4a..4671ccbb841c9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7235,8 +7235,14 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( cast(EpiRedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = EpiRedHeaderPhi->getRecurrenceDescriptor(); - Value *MainResumeValue = - EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); + Value *MainResumeValue; + if (auto *VPI = dyn_cast(EpiRedHeaderPhi->getStartValue())) { + assert((VPI->getOpcode() == VPInstruction::Broadcast || + VPI->getOpcode() == VPInstruction::ReductionStartVector) && + "unexpected start recipe"); + MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); + } else + MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); if (RecurrenceDescriptor::isAnyOfRecurrenceKind( RdxDesc.getRecurrenceKind())) { Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue(); @@ -9173,7 +9179,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); + Type *PhiTy = PhiR->getUnderlyingValue()->getType(); // If tail is folded by masking, introduce selects between the phi // and the users outside the vector region of each reduction, at the // beginning of the dedicated latch block. @@ -9311,6 +9317,27 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // start value. PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue())); } + RecurKind RK = RdxDesc.getRecurrenceKind(); + if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) && + !RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && + !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) { + VPBuilder PHBuilder(Plan->getVectorPreheader()); + VPValue *Iden = Plan->getOrAddLiveIn( + getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags())); + // If the PHI is used by a partial reduction, set the scale factor. + unsigned ScaleFactor = + RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr()) + .value_or(1); + Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext()); + auto *ScaleFactorVPV = + Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor)); + VPValue *StartV = PHBuilder.createNaryOp( + VPInstruction::ReductionStartVector, + {PhiR->getStartValue(), Iden, ScaleFactorVPV}, + PhiTy->isFloatingPointTy() ? RdxDesc.getFastMathFlags() + : FastMathFlags()); + PhiR->setOperand(0, StartV); + } } for (VPRecipeBase *R : ToDelete) R->eraseFromParent(); @@ -9816,6 +9843,15 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]); ResumeV = Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV); + } else { + VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV); + auto *PhiR = dyn_cast(&R); + if (auto *VPI = dyn_cast(PhiR->getStartValue())) { + assert(VPI->getOpcode() == VPInstruction::ReductionStartVector && + "unexpected start value"); + VPI->setOperand(0, StartVal); + continue; + } } } else { // Retrieve the induction resume values for wide inductions from diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 273df55188c16..bbcbfee4e471b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -934,6 +934,10 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Scale the first operand (vector step) by the second operand /// (scalar-step). Casts both operands to the result type if needed. WideIVStep, + /// Start vector for reductions with 3 operands: the original start value, + /// the identity value for the reduction and an integer indicating the + /// scaling factor. + ReductionStartVector, // Creates a step vector starting from 0 to VF with a step of 1. StepVector, @@ -2231,7 +2235,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return Op == getStartValue(); + return isOrdered() || isInLoop(); } }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 81fc93bbf51fd..76da5b0314a8e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -74,6 +74,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { switch (Opcode) { case Instruction::ExtractElement: case Instruction::Freeze: + case VPInstruction::ReductionStartVector: return inferScalarType(R->getOperand(0)); case Instruction::Select: { Type *ResTy = inferScalarType(R->getOperand(1)); @@ -395,6 +396,10 @@ static unsigned getVFScaleFactor(VPRecipeBase *R) { return RR->getVFScaleFactor(); if (auto *RR = dyn_cast(R)) return RR->getVFScaleFactor(); + assert( + (!isa(R) || cast(R)->getOpcode() != + VPInstruction::ReductionStartVector) && + "getting scaling factor of reduction-start-vector not implemented yet"); return 1; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 78ad9b797fb6c..62b99d98a2b5e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateVectorSplat( State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); } + case VPInstruction::ReductionStartVector: { + if (State.VF.isScalar()) + return State.get(getOperand(0), true); + IRBuilderBase::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(getFastMathFlags()); + // If this start vector is scaled then it should produce a vector with fewer + // elements than the VF. + ElementCount VF = State.VF.divideCoefficientBy( + cast(getOperand(2)->getLiveInIRValue())->getZExtValue()); + auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true)); + Constant *Zero = Builder.getInt32(0); + return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true), + Zero); + } case VPInstruction::ComputeAnyOfResult: { // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary // and will be removed by breaking up the recipe further. @@ -899,6 +913,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::PtrAdd: case VPInstruction::WideIVStep: case VPInstruction::StepVector: + case VPInstruction::ReductionStartVector: return false; default: return true; @@ -929,6 +944,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: + case VPInstruction::ReductionStartVector: return true; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); @@ -1034,6 +1050,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::FirstActiveLane: O << "first-active-lane"; break; + case VPInstruction::ReductionStartVector: + O << "reduction-start-vector"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1617,6 +1636,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { Opcode == Instruction::FDiv || Opcode == Instruction::FRem || Opcode == Instruction::FCmp || Opcode == Instruction::Select || Opcode == VPInstruction::WideIVStep || + Opcode == VPInstruction::ReductionStartVector || Opcode == VPInstruction::ComputeReductionResult; case OperationType::NonNegOp: return Opcode == Instruction::ZExt; @@ -3847,17 +3867,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionPHIRecipe::execute(VPTransformState &State) { - // If this phi is fed by a scaled reduction then it should output a - // vector with fewer elements than the VF. - ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor); + // Reductions do not have to start at zero. They can start with + // any loop invariant values. + VPValue *StartVPV = getStartValue(); // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. - auto *ScalarTy = State.TypeAnalysis.inferScalarType(this); + BasicBlock *VectorPH = + State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); bool ScalarPHI = State.VF.isScalar() || IsInLoop; - Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF); + Value *StartV = State.get(StartVPV, ScalarPHI); + Type *VecTy = StartV->getType(); BasicBlock *HeaderBB = State.CFG.PrevBB; assert(State.CurrentParentLoop->getHeader() == HeaderBB && @@ -3866,49 +3888,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { Phi->insertBefore(HeaderBB->getFirstInsertionPt()); State.set(this, Phi, IsInLoop); - BasicBlock *VectorPH = - State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); - // Create start and identity vector values for the reduction in the preheader. - // TODO: Introduce recipes in VPlan preheader to create initial values. - IRBuilderBase::InsertPointGuard IPBuilder(State.Builder); - State.Builder.SetInsertPoint(VectorPH->getTerminator()); - - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - VPValue *StartVPV = getStartValue(); - RecurKind RK = RdxDesc.getRecurrenceKind(); - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || - RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { - // [I|F]FindLastIV will use a sentinel value to initialize the reduction - // phi or the resume value from the main vector loop when vectorizing the - // epilogue loop. In the exit block, ComputeReductionResult will generate - // checks to verify if the reduction result is the sentinel value. If the - // result is the sentinel value, it will be corrected back to the start - // value. - // TODO: The sentinel value is not always necessary. When the start value is - // a constant, and smaller than the start value of the induction variable, - // the start value can be directly used to initialize the reduction phi. - Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH); - return; - } - - Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(), - RdxDesc.getFastMathFlags()); - unsigned CurrentPart = getUnrollPart(*this); - Value *StartV = StartVPV->getLiveInIRValue(); - if (!ScalarPHI) { - if (CurrentPart == 0) { - Iden = State.Builder.CreateVectorSplat(VF, Iden); - Constant *Zero = State.Builder.getInt32(0); - StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero); - } else { - Iden = State.Builder.CreateVectorSplat(VF, Iden); - } - } - - Value *StartVal = (CurrentPart == 0) ? StartV : Iden; - Phi->addIncoming(StartVal, VectorPH); + Phi->addIncoming(StartV, VectorPH); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ea617f042566b..dc3c7bfe5cd1a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1153,6 +1153,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } } + // Simplify redundant ReductionStartVector recipes after unrolling. + VPValue *StartV; + if (match(Def, m_VPInstruction( + m_VPValue(StartV), m_VPValue(), m_VPValue()))) { + Def->replaceUsesWithIf(StartV, [](const VPUser &U, unsigned Idx) { + auto *PhiR = dyn_cast(&U); + return PhiR && PhiR->isInLoop(); + }); + return; + } } void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 335301a927ceb..e4c068ef175bc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Intrinsics.h" using namespace llvm; +using namespace llvm::VPlanPatternMatch; namespace { @@ -223,6 +224,22 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, Copy->addOperand(R); Copy->addOperand(getConstantVPV(Part)); } else if (RdxPhi) { + // If the start value is a ReductionStartVector, use the identity value + // (second operand) for unrolled parts. If the scaling factor is > 1, + // create a new ReductionStartVector with the scale factor and both + // operands set to the identity value. + if (auto *VPI = dyn_cast(RdxPhi->getStartValue())) { + assert(VPI->getOpcode() == VPInstruction::ReductionStartVector && + "unexpected start VPInstruction"); + if (match(VPI->getOperand(2), m_SpecificInt(1))) { + Copy->setOperand(0, VPI->getOperand(1)); + } else if (Part == 1) { + auto *C = VPI->clone(); + C->setOperand(0, C->getOperand(1)); + C->insertAfter(VPI); + addUniformForAllParts(C); + } + } Copy->addOperand(getConstantVPV(Part)); } else { assert(isa(R) && @@ -233,7 +250,6 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, /// Handle non-header-phi recipes. void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { - using namespace llvm::VPlanPatternMatch; if (match(&R, m_BranchOnCond(m_VPValue())) || match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) return; @@ -301,7 +317,6 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { } } -using namespace llvm::VPlanPatternMatch; void UnrollState::unrollBlock(VPBlockBase *VPB) { auto *VPR = dyn_cast(VPB); if (VPR) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index d4494089f7083..5508a65744c6b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -60,16 +60,16 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i32 [[TMP2]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF4]] ; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[N_VEC5]] to i8 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT]], -; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT10]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT11]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT11]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[IV:%.*]] = trunc i32 [[INDEX6]] to i8 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[GEP]], i32 0 @@ -87,12 +87,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index 0e5e785a94636..c3fc91c4574f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -161,8 +161,8 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]] -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index a9d5b5dda8bb6..8095f258ea183 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -17,12 +17,13 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi ir<0>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4) +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4) ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> @@ -83,11 +84,12 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4) +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4) ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]> ; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a> ; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]> diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index f1947dec2ea23..b4987127a513d 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -153,10 +153,10 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]] ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]] +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 01a7ea4ffcd05..3f17c95f7ca95 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -277,9 +277,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -581,8 +581,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 +; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 @@ -771,8 +771,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 +; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index 2e50c02afadd0..af36f184ea820 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -137,9 +137,9 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer -; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0 ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1220,9 +1220,9 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer -; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0 ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 8df21f30a7550..79f490aa16a97 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -37,13 +37,14 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): scalar.ph, vector.ph ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: vector.ph: +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_START:%.]]> = reduction-start-vector ir<%start>, ir<0>, ir<1> ; IF-EVL-OUTLOOP-NEXT: Successor(s): vector loop ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: vector loop: { ; IF-EVL-OUTLOOP-NEXT: vector.body: ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-OUTLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> -; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, vp<[[RDX_SELECT:%.+]]> +; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_SELECT:%.+]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[VF]]> @@ -77,13 +78,14 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Live-in ir<%n> = original trip-count ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP: vector.ph: +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_START:%.]]> = reduction-start-vector ir<%start>, ir<0>, ir<1> ; IF-EVL-INLOOP-NEXT: Successor(s): vector loop ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: vector loop: { ; IF-EVL-INLOOP-NEXT: vector.body: ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> -; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> +; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[VF]]> @@ -116,12 +118,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Live-in ir<%n> = original trip-count ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP: vector.ph: +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_START:%.]]> = reduction-start-vector ir<%start>, ir<0>, ir<1> ; NO-VP-OUTLOOP-NEXT: Successor(s): vector loop ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: vector loop: { ; NO-VP-OUTLOOP-NEXT: vector.body: ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION -; NO-VP-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> +; NO-VP-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> @@ -164,12 +167,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Live-in ir<%n> = original trip-count ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP: vector.ph: +; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_START:%.]]> = reduction-start-vector ir<%start>, ir<0>, ir<1> ; NO-VP-INLOOP-NEXT: Successor(s): vector loop ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: vector loop: { ; NO-VP-INLOOP-NEXT: vector.body: ; NO-VP-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION -; NO-VP-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> +; NO-VP-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 7c42c3d9cd52e..2c6fe4f5c808e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1271,12 +1271,12 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i32 [[TMP1]], 4 ; CHECK-NEXT: [[N_VEC8:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF7]] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX9:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll index 04271ff3c9976..2cda2533e80e0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll @@ -33,7 +33,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i32 %lftr.wideiv, %n ; CHECK: Cost of 0 for VF 2: exit condition instruction %lftr.wideiv = trunc i64 %indvars.iv.next to i32 ; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 1 for VF 2: WIDEN-REDUCTION-PHI ir<%sum.013> = phi ir<0>, vp<[[EXT:%.+]]> +; CHECK: Cost of 1 for VF 2: WIDEN-REDUCTION-PHI ir<%sum.013> = phi vp<{{.+}}>, vp<[[EXT:%.+]]> ; CHECK: Cost of 0 for VF 2: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK: Cost of 0 for VF 2: vp<[[VECP1:%.+]]> = vector-pointer ir<%arrayidx> diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 800b6f3f28b7d..6bf8883fbf127 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -44,16 +44,16 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -70,12 +70,12 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 3 @@ -148,16 +148,16 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = select i1 [[TMP14]], i64 -9223372036854775808, i64 [[BC_MERGE_RDX1]] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT8]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP9]], align 4 @@ -174,12 +174,12 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: [[C:%.*]] = fcmp fast ueq float [[L]], 3.000000e+00 @@ -261,16 +261,16 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF2]] ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[N_VEC3]] to i8 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[DOTSPLAT]], -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[DOTSPLAT9]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i8> [ [[DOTSPLAT9]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 @@ -288,12 +288,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N14]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i8 [ [[TMP13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i8 [ [[RDX_SELECT13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP]], align 8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 0b5074b3a1309..9d2719a6153ce 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -189,6 +189,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<1234>, ir<-1>, ir<1> ; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -196,7 +197,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi vp<[[RDX_START]]>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]>, vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 5837a49bf0efc..5c1f628bef6d5 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -313,9 +313,9 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] ; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] +; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 ; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0 ; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 1517ec84e9e01..5a5b06de69552 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3117,9 +3117,9 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -3161,9 +3161,9 @@ define i32 @testoverflowcheck() { ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510 ; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] +; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -3203,9 +3203,9 @@ define i32 @testoverflowcheck() { ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508 ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] +; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -3246,9 +3246,9 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -3293,9 +3293,9 @@ define i32 @testoverflowcheck() { ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504 ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] +; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[C_PROMOTED_I]], i64 0 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index d57e7fa526c94..95fbc4260587a 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -16,12 +16,13 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0.000000e+00>, ir<0.000000e+00>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -84,12 +85,13 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0.000000e+00>, ir<0.000000e+00>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next> ; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -153,12 +155,13 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector nnan ninf nsz ir<0.000000e+00>, ir<0.000000e+00>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi vp<[[RDX_START]]>, ir<%muladd> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -278,12 +281,13 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx> @@ -322,12 +326,13 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> @@ -371,12 +376,13 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>