diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1f6996cd9c1f4..c584483bc5213 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9611,9 +9611,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - VPReductionRecipe *RedRecipe = - new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp, - CondOp, CM.useOrderedReductions(RdxDesc)); + auto *RedRecipe = new VPReductionRecipe( + RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp, + CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc()); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Note that this transformation may leave over dead recipes (including diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6486c6745a680..0256a5f4baa16 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1653,7 +1653,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, DebugLoc DL = {}) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments), + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { LLVMContext &Ctx = Ty->getContext(); AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID); @@ -2597,8 +2597,9 @@ class VPReductionRecipe : public VPSingleDefRecipe { protected: VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef Operands, - VPValue *CondOp, bool IsOrdered) - : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) { + VPValue *CondOp, bool IsOrdered, DebugLoc DL) + : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R), + IsOrdered(IsOrdered) { if (CondOp) { IsConditional = true; addOperand(CondOp); @@ -2608,16 +2609,17 @@ class VPReductionRecipe : public VPSingleDefRecipe { public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, - bool IsOrdered) + bool IsOrdered, DebugLoc DL = {}) : VPReductionRecipe(VPDef::VPReductionSC, R, I, ArrayRef({ChainOp, VecOp}), CondOp, - IsOrdered) {} + IsOrdered, DL) {} ~VPReductionRecipe() override = default; VPReductionRecipe *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), - getVecOp(), getCondOp(), IsOrdered); + getVecOp(), getCondOp(), IsOrdered, + getDebugLoc()); } static inline bool classof(const VPRecipeBase *R) { @@ -2672,7 +2674,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe { VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(), cast_or_null(R.getUnderlyingValue()), ArrayRef({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, - R.isOrdered()) {} + R.isOrdered(), R.getDebugLoc()) {} ~VPReductionEVLRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f82711141419c..62fc08f4aac58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2116,6 +2116,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + State.setDebugLocFrom(getDebugLoc()); Value *NewVecOp = State.get(getVecOp()); if (VPValue *Cond = getCondOp()) { Value *NewCond = State.get(Cond, State.VF.isScalar()); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll new file mode 100644 index 0000000000000..93bd44f5c6220 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll @@ -0,0 +1,39 @@ +; RUN: opt -passes=debugify,loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s + +; Testing the debug locations of the generated vector intrinsic is same as +; its scalar counterpart. + +define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) { +; DEBUGLOC-LABEL: define void @vp_select( +; DEBUGLOC: vector.body: +; DEBUGLOC: = call @llvm.vp.select.nxv4i32( %{{.+}}, %{{.+}}, %{{.+}}, i32 %{{.+}}), !dbg ![[SELLOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: = select i1 %{{.+}}, i32 %{{.+}}, i32 %{{.+}}, !dbg ![[SELLOC]] +; + entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 4 + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + %load.c = load i32, ptr %gep.c, align 4 + %cmp = icmp sgt i32 %load.b, %load.c + %neg.c = sub i32 0, %load.c + %sel = select i1 %cmp, i32 %load.c, i32 %neg.c + %add = add i32 %sel, %load.b + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + + exit: + ret void + } + + ; DEBUGLOC: [[SELLOC]] = !DILocation(line: 9 diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 5052ba8117751..bb8e19e3175f1 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -109,6 +109,31 @@ exit: ret void } +define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { +; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg( +; DEBUGLOC: vector.body: +; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: = call float @llvm.sqrt.f32(float %{{.+}}), !dbg ![[INTRINSIC_LOC]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %load = load float, ptr %gep.y, align 4 + %call = call float @llvm.sqrt.f32(float %load) + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 4} ; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4} @@ -116,3 +141,4 @@ exit: ; DEBUGLOC: ![[RESUMELOC]] = !DILocation(line: 2 ; DEBUGLOC: ![[PTRIVLOC]] = !DILocation(line: 12 +; DEBUGLOC: ![[INTRINSIC_LOC]] = !DILocation(line: 44 diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll new file mode 100644 index 0000000000000..57f0dc205dba1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC + +; Testing the debug locations of the generated vector intstructions are same as +; their scalar counterpart. + +define i32 @reduction_sum(ptr %A, ptr %B) { +; DEBUGLOC-LABEL: define i32 @reduction_sum( +; DEBUGLOC: vector.body: +; DEBUGLOC: = load <4 x i32>, ptr %{{.+}}, align 4, !dbg ![[LOADLOC:[0-9]+]] +; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]] +; DEBUGLOC: loop: +; DEBUGLOC: %[[LOAD:.+]] = load i32, ptr %{{.+}}, align 4, !dbg ![[LOADLOC]] +; DEBUGLOC: = add i32 %{{.+}}, %[[LOAD]], !dbg ![[REDLOC]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %load = load i32, ptr %gep, align 4 + %red.next = add i32 %red, %load + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 256 + br i1 %exitcond, label %exit, label %loop + +exit: + %red.lcssa = phi i32 [ %red.next, %loop ] + ret i32 %red.lcssa +} + +; DEBUGLOC: ![[LOADLOC]] = !DILocation(line: 5 +; DEBUGLOC: ![[REDLOC]] = !DILocation(line: 6