From 431d6d914f4a9d190e6e3d0ee5d5e77fcfb7690a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 5 Dec 2024 00:09:46 +0800 Subject: [PATCH 1/5] [LV] Allow VPWidenPHI in non-native path and copy DebugLoc We can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it. We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 ++++----- llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++++++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++---- .../Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++-- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f2f8a85b7cc23..5f924e25fcf20 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -516,7 +516,7 @@ class InnerLoopVectorizer { VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. - void fixNonInductionPHIs(VPTransformState &State); + void fixWidenedPHIs(VPTransformState &State); /// Returns the original loop trip count. Value *getTripCount() const { return TripCount; } @@ -2977,9 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, } void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { - // Fix widened non-induction PHIs by setting up the PHI operands. - if (EnableVPlanNativePath) - fixNonInductionPHIs(State); + // Fix widened PHIs by setting up the PHI operands. + fixWidenedPHIs(State); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); @@ -3116,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { } while (Changed); } -void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { +void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) { auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &P : VPBB->phis()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 88f3f672d3aa3..b42dbe9208672 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2287,10 +2287,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { /// List of incoming blocks. Only used in the VPlan native path. SmallVector IncomingBlocks; + /// Name to use for the generated IR instruction for the widened IV. + std::string Name; + public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. - VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) - : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi) { + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, + const Twine &Name = "vec.phi") + : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, + Phi->getDebugLoc()), + Name(Name.str()) { if (Start) addOperand(Start); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 77c08839dbfa9..bfe945d73bf14 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3440,12 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPWidenPHIRecipe::execute(VPTransformState &State) { - assert(EnableVPlanNativePath && - "Non-native vplans are not expected to have VPWidenPHIRecipes."); - + State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); - Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); + Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); State.set(this, VecPhi); } diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 66aceab9fb27c..44afa34100c29 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] ; CHECK: for.cond5.preheader1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] From cda41c6532be6aa56f66e093c44cb521cb64343e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 5 Dec 2024 00:20:20 +0800 Subject: [PATCH 2/5] Update comment --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5f924e25fcf20..0eaaa711a5f95 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -515,7 +515,7 @@ class InnerLoopVectorizer { VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State); - /// Fix the non-induction PHIs in \p Plan. + /// Fix the widened PHIs in \p Plan. void fixWidenedPHIs(VPTransformState &State); /// Returns the original loop trip count. From f31878cde04c29d2f76bc82879d49e0f37566e35 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 5 Dec 2024 04:04:21 +0800 Subject: [PATCH 3/5] Rename fixNonInductionPHIs back --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0eaaa711a5f95..ef8abdaa8aa76 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -515,8 +515,8 @@ class InnerLoopVectorizer { VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State); - /// Fix the widened PHIs in \p Plan. - void fixWidenedPHIs(VPTransformState &State); + /// Fix the non-induction PHIs in \p Plan. + void fixNonInductionPHIs(VPTransformState &State); /// Returns the original loop trip count. Value *getTripCount() const { return TripCount; } @@ -2977,8 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, } void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { - // Fix widened PHIs by setting up the PHI operands. - fixWidenedPHIs(State); + // Fix widened non-induction PHIs by setting up the PHI operands. + fixNonInductionPHIs(State); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); @@ -3115,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { } while (Changed); } -void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) { +void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &P : VPBB->phis()) { From 69d40761f44ddd666d6d0da97e2d791210425a53 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 13 Dec 2024 01:22:27 +0800 Subject: [PATCH 4/5] Update comments to clarify which parts can only be used in the vplan-native path, also add back the assertion but check that the incomingblocks is empty --- llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b42dbe9208672..d15418ab7415e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2318,13 +2318,15 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { VPSlotTracker &SlotTracker) const override; #endif - /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. + /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. Only used in the + /// VPlan native path. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { addOperand(IncomingV); IncomingBlocks.push_back(IncomingBlock); } - /// Returns the \p I th incoming VPBasicBlock. + /// Returns the \p I th incoming VPBasicBlock. Only used in the VPlan native + /// path. VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } /// Returns the \p I th incoming VPValue. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bfe945d73bf14..6a35289c4dfa2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3440,6 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPWidenPHIRecipe::execute(VPTransformState &State) { + assert((EnableVPlanNativePath || IncomingBlocks.empty()) && + "Non-native vplans are not expected to have VPWidenPHIRecipes with " + "incoming blocks."); + State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); From 46ae40263fa35accbaf52f2f738e0262dd048d60 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 3 Jan 2025 12:54:04 +0800 Subject: [PATCH 5/5] Remove DebugLoc changes --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 - llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6a35289c4dfa2..b470e228e023a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3444,7 +3444,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { "Non-native vplans are not expected to have VPWidenPHIRecipes with " "incoming blocks."); - State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 44afa34100c29..66aceab9fb27c 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] ; CHECK: for.cond5.preheader1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]