From df05fafd8e3dc889060f820ae697e8f1646afa65 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Sun, 15 Dec 2024 19:45:13 -0800 Subject: [PATCH 1/4] Precommit test case. --- .../LoopVectorize/RISCV/preserve-dbg-loc.ll | 36 +++++++++++++++++++ .../LoopVectorize/preserve-dbg-loc.ll | 33 +++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll create mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll new file mode 100644 index 0000000000000..5993dc26a0e8f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll @@ -0,0 +1,36 @@ +; RUN: opt -passes=debugify,loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s + +; Testing the debug locations of the generated vector intstruction are same as +; their scalar instruction. + +; DEBUGLOC-LABEL: define void @vp_select( +define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; DEBUGLOC: vector.body: +; DEBUGLOC: %[[VPSel:[0-9]+]] = call @llvm.vp.select.nxv4i32( %15, %vp.op.load1, %vp.op, i32 %9) +; DEBUGLOC: for.body: +; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39 + entry: + br label %for.body + + for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv + %1 = load i32, ptr %arrayidx3, align 4 + %cmp4 = icmp sgt i32 %0, %1 + %2 = sub i32 0, %1 + %cond.p = select i1 %cmp4, i32 %1, i32 %2 + %cond = add i32 %cond.p, %0 + %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %cond, ptr %arrayidx15, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond.not, label %exit, label %for.body + + exit: + ret void + } diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll new file mode 100644 index 0000000000000..02cac45f1c5cc --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Testing the debug locations of the generated vector intstruction are same as +; their scalar instruction. + +; DEBUGLOC-LABEL: define i32 @reduction_sum( +define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { +; DEBUGLOC: vector.body: +; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]] +; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]] +; DEBUGLOC: .lr.ph: +; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]] +; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]] +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + %l3 = load i32, ptr %l2, align 4 + %l6 = trunc i64 %indvars.iv to i32 + %l7 = add i32 %sum.02, %l3 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] + ret i32 %sum.0.lcssa +} From d0335ee5f69b94c5586875a92c7ed209950f0617 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Sun, 15 Dec 2024 21:45:32 -0800 Subject: [PATCH 2/4] [VPlan] Set debug loc for VPReduction/VPWidenIntrinsicRecipe. This patch add missing debug location for VPReduction/VPWidenIntrinsicRecipe. --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 +-- llvm/lib/Transforms/Vectorize/VPlan.h | 20 ++++---- .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 2 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 + .../LoopVectorize/RISCV/preserve-dbg-loc.ll | 47 ++++++++++--------- .../LoopVectorize/dbg-outer-loop-vect.ll | 4 +- .../preserve-dbg-loc-and-loop-metadata.ll | 26 ++++++++++ .../preserve-dbg-loc-reduction-inloop.ll | 34 ++++++++++++++ .../LoopVectorize/preserve-dbg-loc.ll | 33 ------------- 9 files changed, 104 insertions(+), 70 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1f6996cd9c1f4..c584483bc5213 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9611,9 +9611,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - VPReductionRecipe *RedRecipe = - new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp, - CondOp, CM.useOrderedReductions(RdxDesc)); + auto *RedRecipe = new VPReductionRecipe( + RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp, + CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc()); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Note that this transformation may leave over dead recipes (including diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6486c6745a680..c13b1b0e13e29 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1653,7 +1653,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, DebugLoc DL = {}) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments), + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { LLVMContext &Ctx = Ty->getContext(); AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID); @@ -2295,8 +2295,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. - VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) - : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi) { + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}) + : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL) { if (Start) addOperand(Start); } @@ -2597,8 +2597,9 @@ class VPReductionRecipe : public VPSingleDefRecipe { protected: VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef Operands, - VPValue *CondOp, bool IsOrdered) - : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) { + VPValue *CondOp, bool IsOrdered, DebugLoc DL) + : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R), + IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; addOperand(CondOp); @@ -2608,16 +2609,17 @@ class VPReductionRecipe : public VPSingleDefRecipe { public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered) + bool IsOrdered, DebugLoc DL = {}) : VPReductionRecipe(VPDef::VPReductionSC, R, I, ArrayRef({ChainOp, VecOp}), CondOp, - IsOrdered) {} + IsOrdered, DL) {} ~VPReductionRecipe() override = default; VPReductionRecipe *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), - getVecOp(), getCondOp(), IsOrdered); + getVecOp(), getCondOp(), IsOrdered, + getDebugLoc()); } static inline bool classof(const VPRecipeBase *R) { @@ -2672,7 +2674,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe { VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(), cast_or_null(R.getUnderlyingValue()), ArrayRef({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, - R.isOrdered()) {} + R.isOrdered(), R.getDebugLoc()) {} ~VPReductionEVLRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 6e633739fcc3d..140cea3c700d8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -308,7 +308,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // Phi node's operands may have not been visited at this point. We create // an empty VPInstruction that we will fix once the whole plain CFG has // been built. - NewVPV = new VPWidenPHIRecipe(Phi); + NewVPV = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc()); VPBB->appendRecipe(cast(NewVPV)); PhisToFix.push_back(Phi); } else { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f82711141419c..0e7c4b6e4d9b9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2116,6 +2116,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + State.setDebugLocFrom(getDebugLoc()); Value *NewVecOp = State.get(getVecOp()); if (VPValue *Cond = getCondOp()) { Value *NewCond = State.get(Cond, State.VF.isScalar()); @@ -3443,6 +3444,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { assert(EnableVPlanNativePath && "Non-native vplans are not expected to have VPWidenPHIRecipes."); + State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll index 5993dc26a0e8f..93bd44f5c6220 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll @@ -3,34 +3,37 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s -; Testing the debug locations of the generated vector intstruction are same as -; their scalar instruction. +; Testing the debug locations of the generated vector intrinsic is same as +; its scalar counterpart. +define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) { ; DEBUGLOC-LABEL: define void @vp_select( -define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; DEBUGLOC: vector.body: -; DEBUGLOC: %[[VPSel:[0-9]+]] = call @llvm.vp.select.nxv4i32( %15, %vp.op.load1, %vp.op, i32 %9) -; DEBUGLOC: for.body: -; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39 +; DEBUGLOC: = call @llvm.vp.select.nxv4i32( %{{.+}}, %{{.+}}, %{{.+}}, i32 %{{.+}}), !dbg ![[SELLOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: = select i1 %{{.+}}, i32 %{{.+}}, i32 %{{.+}}, !dbg ![[SELLOC]] +; entry: - br label %for.body + br label %loop - for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv - %1 = load i32, ptr %arrayidx3, align 4 - %cmp4 = icmp sgt i32 %0, %1 - %2 = sub i32 0, %1 - %cond.p = select i1 %cmp4, i32 %1, i32 %2 - %cond = add i32 %cond.p, %0 - %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %cond, ptr %arrayidx15, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %exit, label %for.body +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 4 + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + %load.c = load i32, ptr %gep.c, align 4 + %cmp = icmp sgt i32 %load.b, %load.c + %neg.c = sub i32 0, %load.c + %sel = select i1 %cmp, i32 %load.c, i32 %neg.c + %add = add i32 %sel, %load.b + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop exit: ret void } + + ; DEBUGLOC: [[SELLOC]] = !DILocation(line: 9 diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 66aceab9fb27c..44afa34100c29 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] ; CHECK: for.cond5.preheader1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 5052ba8117751..bb8e19e3175f1 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -109,6 +109,31 @@ exit: ret void } +define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { +; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg( +; DEBUGLOC: vector.body: +; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: = call float @llvm.sqrt.f32(float %{{.+}}), !dbg ![[INTRINSIC_LOC]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %load = load float, ptr %gep.y, align 4 + %call = call float @llvm.sqrt.f32(float %load) + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 4} ; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4} @@ -116,3 +141,4 @@ exit: ; DEBUGLOC: ![[RESUMELOC]] = !DILocation(line: 2 ; DEBUGLOC: ![[PTRIVLOC]] = !DILocation(line: 12 +; DEBUGLOC: ![[INTRINSIC_LOC]] = !DILocation(line: 44 diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll new file mode 100644 index 0000000000000..b0ee1b352d236 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC + +; Testing the debug locations of the generated vector intstructions are same as +; their scalar counterpart. + +define i32 @reduction_sum(ptr %A, ptr %B) { +; DEBUGLOC-LABEL: define i32 @reduction_sum( +; DEBUGLOC: vector.body: +; DEBUGLOC: = load <4 x i32>, ptr %{{.+}}, align 4, !dbg ![[LOADLOC:[0-9]+]] +; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]] +; DEBUGLOC: = add i32 %{{.+}}, %{{.+}}, !dbg ![[REDLOC]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %load = load i32, ptr %gep, align 4 + %red.next = add i32 %red, %load + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 256 + br i1 %exitcond, label %exit, label %loop + +exit: + %red.lcssa = phi i32 [ %red.next, %loop ] + ret i32 %red.lcssa +} + +; DEBUGLOC: ![[LOADLOC]] = !DILocation(line: 5 +; DEBUGLOC: ![[REDLOC]] = !DILocation(line: 6 diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll deleted file mode 100644 index 02cac45f1c5cc..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Testing the debug locations of the generated vector intstruction are same as -; their scalar instruction. - -; DEBUGLOC-LABEL: define i32 @reduction_sum( -define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { -; DEBUGLOC: vector.body: -; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]] -; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]] -; DEBUGLOC: .lr.ph: -; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]] -; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]] -entry: - br label %.lr.ph - -.lr.ph: ; preds = %entry, %.lr.ph - %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] - %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] - %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv - %l3 = load i32, ptr %l2, align 4 - %l6 = trunc i64 %indvars.iv to i32 - %l7 = add i32 %sum.02, %l3 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 256 - br i1 %exitcond, label %._crit_edge, label %.lr.ph - -._crit_edge: ; preds = %.lr.ph - %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] - ret i32 %sum.0.lcssa -} From a5eb553cb8552ae8a0296e88f108f261598fccd3 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 17 Dec 2024 15:50:44 -0800 Subject: [PATCH 3/4] !fixup Split changes of VPWidenPHIRecipe. --- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 - llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c13b1b0e13e29..0256a5f4baa16 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2295,8 +2295,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { public: /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. - VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}) - : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi, DL) { + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) + : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef(), Phi) { if (Start) addOperand(Start); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 140cea3c700d8..6e633739fcc3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -308,7 +308,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // Phi node's operands may have not been visited at this point. We create // an empty VPInstruction that we will fix once the whole plain CFG has // been built. - NewVPV = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc()); + NewVPV = new VPWidenPHIRecipe(Phi); VPBB->appendRecipe(cast(NewVPV)); PhisToFix.push_back(Phi); } else { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0e7c4b6e4d9b9..62fc08f4aac58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3444,7 +3444,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { assert(EnableVPlanNativePath && "Non-native vplans are not expected to have VPWidenPHIRecipes."); - State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 44afa34100c29..66aceab9fb27c 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] ; CHECK: for.cond5.preheader1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] From 16549322783e56978b9979c9f63e99c370647021 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Wed, 18 Dec 2024 15:15:47 -0800 Subject: [PATCH 4/4] !fixup Make sure we match the correct `Add`. --- .../LoopVectorize/preserve-dbg-loc-reduction-inloop.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll index b0ee1b352d236..57f0dc205dba1 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll @@ -9,8 +9,8 @@ define i32 @reduction_sum(ptr %A, ptr %B) { ; DEBUGLOC: = load <4 x i32>, ptr %{{.+}}, align 4, !dbg ![[LOADLOC:[0-9]+]] ; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]] ; DEBUGLOC: loop: -; DEBUGLOC: = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]] -; DEBUGLOC: = add i32 %{{.+}}, %{{.+}}, !dbg ![[REDLOC]] +; DEBUGLOC: %[[LOAD:.+]] = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]] +; DEBUGLOC: = add i32 %{{.+}}, %[[LOAD]], !dbg ![[REDLOC]] ; entry: br label %loop