diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index a618d72b406b3..dd8bef69ad170 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1122,6 +1122,7 @@ class WidenIV { bool widenLoopCompare(NarrowIVDefUse DU); bool widenWithVariantUse(NarrowIVDefUse DU); + bool isKnownNonNegative(const SCEV *S); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); private: @@ -1885,12 +1886,45 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri return WideUse; } +// A special version of isKnownNonNegative which additionally tries +// to prove that an addrec with a negative step would be non-negative +// because the start is non-negative, and the increment would have +// "nuw" if using a sub-instruction. +// TODO: All of this should be sunk into SCEV once we figure out how to +// reasonable do so without exploding compile time. +bool WidenIV::isKnownNonNegative(const SCEV *S) { + const SCEV *Zero = SE->getZero(S->getType()); + if (SE->isKnownPredicate(ICmpInst::ICMP_SGE, S, Zero)) + return true; + auto *AR = dyn_cast(S); + if (!AR || !AR->isAffine()) + return false; + + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *PostInc = AR->getPostIncExpr(*SE); + // For a negative step, we can prove the result non-negative if the addrec + // only traverses values in the range zext([0,UINT_MAX]). + // TODO: Consider extending this for unknown steps? Would need to + // handle the positive step bound + if (!SE->isKnownNegative(Step)) + return false; + if (!SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, Start, Zero)) + return false; + + // Check for the unsigned form of these comparisons. The signed form + // should have been handled recursively in the query above. + uint32_t BitWidth = cast(AR->getType())->getBitWidth(); + const SCEV *N = SE->getConstant(APInt::getMaxValue(BitWidth) - + SE->getSignedRangeMin(Step)); + return SE->isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || + SE->isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, PostInc, N); +} + /// Add eligible users of NarrowDef to NarrowIVUsers. void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { - const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); - bool NonNegativeDef = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, - SE->getZero(NarrowSCEV->getType())); + assert(L->contains(NarrowDef)); + const bool NonNegativeDef = isKnownNonNegative(SE->getSCEV(NarrowDef)); for (User *U : NarrowDef->users()) { Instruction *NarrowUser = cast(U); diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll index 6f659a88da2e2..b743e5480c7ee 100644 --- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll @@ -507,15 +507,15 @@ declare void @consume.i1(i1) define i32 @test10(i32 %v) { ; CHECK-LABEL: @test10( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[V:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[V:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]] -; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]]) -; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void @consume.i1(i1 [[CMP]]) +; CHECK-NEXT: call void @consume.i64(i64 [[TMP1]]) ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]] ; CHECK: leave: diff --git a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll index b0392cc6fe2c1..9591944fef386 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll @@ -82,12 +82,10 @@ define void @zext_postinc(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -124,8 +122,8 @@ define void @zext_preinc(ptr %A, i32 %start) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -223,8 +221,7 @@ define void @sext_postinc(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -262,8 +259,7 @@ define void @sext_preinc(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -365,15 +361,13 @@ define void @zext_postinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -414,8 +408,8 @@ define void @zext_preinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -521,8 +515,7 @@ define void @sext_postinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -564,8 +557,7 @@ define void @sext_preinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -662,15 +654,18 @@ define void @zext_postinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: [[NONPOS:%.*]] = icmp slt i32 [[START:%.*]], 2 ; CHECK-NEXT: br i1 [[NONPOS]], label [[EXIT:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -711,9 +706,9 @@ define void @zext_preinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -811,14 +806,14 @@ define void @sext_postinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 +; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV_NEXT]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -854,14 +849,14 @@ define void @sext_preinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 +; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]]