diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index e595347d62bf5..0ffcf81f3032b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1005,12 +1005,14 @@ void VPlan::execute(VPTransformState *State) { continue; } - auto *PhiR = cast(&R); - bool NeedsScalar = isa(PhiR) || + auto *PhiR = cast(&R); + // VPInstructions currently model scalar Phis only. + bool NeedsScalar = isa(PhiR) || (isa(PhiR) && cast(PhiR)->isInLoop()); Value *Phi = State->get(PhiR, NeedsScalar); - Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); + // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not. + Value *Val = State->get(PhiR->getOperand(1), NeedsScalar); cast(Phi)->addIncoming(Val, VectorLatchBB); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 43dc30c40bb53..f78eb84b0c445 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -441,9 +441,7 @@ class VPRecipeBase : public ilist_node_with_parent, bool mayHaveSideEffects() const; /// Returns true for PHI-like recipes. - bool isPhi() const { - return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC; - } + bool isPhi() const; /// Returns true if the recipe may read from memory. bool mayReadFromMemory() const; @@ -1879,45 +1877,6 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, #endif }; -/// Recipe to generate a scalar PHI. Used to generate code for recipes that -/// produce scalar header phis, including VPCanonicalIVPHIRecipe and -/// VPEVLBasedIVPHIRecipe. -class VPScalarPHIRecipe : public VPHeaderPHIRecipe { - std::string Name; - -public: - VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, - StringRef Name) - : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL), - Name(Name.str()) { - addOperand(BackedgeValue); - } - - ~VPScalarPHIRecipe() override = default; - - VPScalarPHIRecipe *clone() override { - llvm_unreachable("cloning not implemented yet"); - } - - VP_CLASSOF_IMPL(VPDef::VPScalarPHISC) - - /// Generate the phi/select nodes. - void execute(VPTransformState &State) override; - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif -}; - /// A recipe for widened phis. Incoming values are operands of the recipe and /// their operand index corresponds to the incoming predecessor block. If the /// recipe is placed in an entry block to a (non-replicate) region, it must have diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 6f6875f0e5e0e..9b0720760df40 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -71,6 +71,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { } case VPInstruction::ExplicitVectorLength: return Type::getIntNTy(Ctx, 32); + case Instruction::PHI: + // Infer the type of first operand only, as other operands of header phi's + // may lead to infinite recursion. + return inferScalarType(R->getOperand(0)); case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::Not: case VPInstruction::ResumePhi: @@ -236,14 +240,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { TypeSwitch(V->getDefiningRecipe()) .Case([this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( + [this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .Case= VPFirstPHISC && getVPDefID() <= VPLastPHISC) || + (isa(this) && + cast(this)->getOpcode() == Instruction::PHI); +} + InstructionCost VPPartialReductionRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { @@ -418,6 +424,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { return true; switch (Opcode) { case Instruction::ICmp: + case Instruction::PHI: case Instruction::Select: case VPInstruction::BranchOnCond: case VPInstruction::BranchOnCount: @@ -467,6 +474,17 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *B = State.get(getOperand(1), OnlyFirstLaneUsed); return Builder.CreateCmp(getPredicate(), A, B, Name); } + case Instruction::PHI: { + assert(getParent() == + getParent()->getPlan()->getVectorLoopRegion()->getEntry() && + "VPInstructions with PHI opcodes must be used for header phis only " + "at the moment"); + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + Value *Start = State.get(getOperand(0), VPLane(0)); + PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); + Phi->addIncoming(Start, VectorPH); + return Phi; + } case Instruction::Select: { bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed); @@ -771,7 +789,8 @@ bool VPInstruction::isVectorToScalar() const { } bool VPInstruction::isSingleScalar() const { - return getOpcode() == VPInstruction::ResumePhi; + return getOpcode() == VPInstruction::ResumePhi || + getOpcode() == Instruction::PHI; } #if !defined(NDEBUG) @@ -849,6 +868,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { switch (getOpcode()) { default: return false; + case Instruction::PHI: + return true; case Instruction::ICmp: case Instruction::Select: case Instruction::Or: @@ -3292,11 +3313,12 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); PHINode *NewPointerPhi = nullptr; if (CurrentPart == 0) { - auto *IVR = cast(&getParent() - ->getPlan() - ->getVectorLoopRegion() - ->getEntryBasicBlock() - ->front()); + auto *IVR = getParent() + ->getPlan() + ->getVectorLoopRegion() + ->getEntryBasicBlock() + ->front() + .getVPSingleValue(); PHINode *CanonicalIV = cast(State.get(IVR, /*IsScalar*/ true)); NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV->getIterator()); @@ -3665,22 +3687,3 @@ void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, printOperands(O, SlotTracker); } #endif - -void VPScalarPHIRecipe::execute(VPTransformState &State) { - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Value *Start = State.get(getStartValue(), VPLane(0)); - PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name); - Phi->addIncoming(Start, VectorPH); - Phi->setDebugLoc(getDebugLoc()); - State.set(this, Phi, /*IsScalar=*/true); -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "SCALAR-PHI "; - printAsOperand(O, SlotTracker); - O << " = phi "; - printOperands(O, SlotTracker); -} -#endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ecf52673480e2..b2ff54368adde 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1747,7 +1747,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { // Create a scalar phi to track the previous EVL if fixed-order recurrence is // contained. - VPScalarPHIRecipe *PrevEVL = nullptr; + VPInstruction *PrevEVL = nullptr; bool ContainsFORs = any_of(Header->phis(), IsaPred); if (ContainsFORs) { @@ -1762,7 +1762,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL, Type::getInt32Ty(Ctx), DebugLoc()); } - PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl"); + PrevEVL = new VPInstruction(Instruction::PHI, {MaxEVL, &EVL}, DebugLoc(), + "prev.evl"); PrevEVL->insertBefore(*Header, Header->getFirstNonPhi()); } @@ -2089,9 +2090,9 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { auto *PhiR = cast(&R); StringRef Name = isa(PhiR) ? "index" : "evl.based.iv"; - auto *ScalarR = - new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(), - PhiR->getDebugLoc(), Name); + auto *ScalarR = new VPInstruction( + Instruction::PHI, {PhiR->getStartValue(), PhiR->getBackedgeValue()}, + PhiR->getDebugLoc(), Name); ScalarR->insertBefore(PhiR); PhiR->replaceAllUsesWith(ScalarR); PhiR->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 1b3b69ea6a13d..13fb8a61d9afa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -77,7 +77,9 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) { if (isa(RecipeI)) NumActiveLaneMaskPhiRecipes++; - if (IsHeaderVPBB && !isa(*RecipeI)) { + if (IsHeaderVPBB && !isa(*RecipeI) && + !isa(*RecipeI) && + cast(RecipeI)->getOpcode() == Instruction::PHI) { errs() << "Found non-header PHI recipe in header VPBB"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) errs() << ": "; @@ -143,12 +145,13 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { }) .Case( [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) - .Case( + .Case( [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) .Case( [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { + if (I->getOpcode() == Instruction::PHI) + return VerifyEVLUse(*I, 1); if (I->getOpcode() != Instruction::Add) { errs() << "EVL is used as an operand in non-VPInstruction::Add\n"; return false; @@ -208,7 +211,9 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { if (!UI || isa(UI) || (isa(UI) && - isa(cast(UI)->getInstruction()))) + isa(cast(UI)->getInstruction())) || + (isa(UI) && + cast(UI)->getOpcode() == Instruction::PHI)) continue; // If the user is in the same block, check it comes after R in the diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index a880bea2c52d1..e49192adb11c4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -86,7 +86,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<[[EP_IV:%.+]]> = phi ir<0>, vp<%index.next> +; CHECK-NEXT: EMIT vp<[[EP_IV:%.+]]> = phi ir<0>, vp<%index.next> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4) ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[EP_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 4e862bf2f7480..ffc4cfa61f134 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -193,7 +193,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> @@ -442,7 +442,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index 0bcfe13832ae7..3fed9e4956107 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -27,7 +27,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI:%.+]]> = phi ir<33>, ir<[[LD:%.+]]> -; IF-EVL-NEXT: SCALAR-PHI vp<[[PREV_EVL:%.+]]> = phi vp<[[VF32]]>, vp<[[EVL:%.+]]> +; IF-EVL-NEXT: EMIT vp<[[PREV_EVL:%.+]]> = phi vp<[[VF32]]>, vp<[[EVL:%.+]]> ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%TC>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[EVL]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 67be80393d829..dfc2fffdad2bb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -32,8 +32,8 @@ ; IF-EVL: vector loop: { ; IF-EVL-NEXT: vector.body: - ; IF-EVL-NEXT: SCALAR-PHI vp<[[IV:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT_EXIT:%.+]]> - ; IF-EVL-NEXT: SCALAR-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%.+]]> + ; IF-EVL-NEXT: EMIT vp<[[IV:%.+]]> = phi ir<0>, vp<[[IV_NEXT_EXIT:%.+]]> + ; IF-EVL-NEXT: EMIT vp<[[EVL_PHI:%.+]]> = phi ir<0>, vp<[[IV_NEX:%.+]]> ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/discriminator.ll b/llvm/test/Transforms/LoopVectorize/discriminator.ll index 5e56dcf0c434f..b66a70b9768c4 100644 --- a/llvm/test/Transforms/LoopVectorize/discriminator.ll +++ b/llvm/test/Transforms/LoopVectorize/discriminator.ll @@ -49,8 +49,8 @@ define void @_Z3foov() local_unnamed_addr #0 !dbg !6 { ;LOOPUNROLL_5: discriminator: 21 ; When unrolling after loop vectorize, both vec_body and remainder loop ; are unrolled. -;LOOPVEC_UNROLL: discriminator: 9 ;LOOPVEC_UNROLL: discriminator: 385 +;LOOPVEC_UNROLL: discriminator: 9 ;DBG_VALUE: ![[DBG]] = {{.*}}, scope: ![[TOP]] ; Pseudo probe should not have duplication factor assigned. ;PSEUDO_PROBE: ![[TOP:[0-9]*]] = distinct !DISubprogram(name: "foo" diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 2cc8aea82ca52..56ba4ccbe99e0 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -19,7 +19,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: SCALAR-PHI vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]>