diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f4163b0743a9a..34ac50f286c5f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1700,9 +1700,13 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC) + VPValue *getPtr() const { return getOperand(0); } + VPValue *getVFValue() { return getOperand(1); } const VPValue *getVFValue() const { return getOperand(1); } + Type *getIndexedTy() const { return IndexedTy; }; + void execute(VPTransformState &State) override; bool onlyFirstLaneUsed(const VPValue *Op) const override { @@ -1727,7 +1731,7 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, } VPVectorEndPointerRecipe *clone() override { - return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy, + return new VPVectorEndPointerRecipe(getPtr(), getVFValue(), IndexedTy, getGEPNoWrapFlags(), getDebugLoc()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ac6be09ef271d..c46d004df9be7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2171,6 +2171,29 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask, .Default([&](VPRecipeBase *R) { return nullptr; }); } +// Replace VF operands with EVL in recipes that use VF. +static void replaceVFWithEVL(VPValue &VF, VPValue &EVL) { + for (VPUser *U : to_vector(VF.users())) { + if (auto *VEP = dyn_cast(U)) { + VPValue *Ptr = VEP->getPtr(); + GEPNoWrapFlags NewFlags = VEP->getGEPNoWrapFlags(); + // Replacing VF with EVL ensures that VPVectorEndPointer will not compute + // out-of-bounds, as long as the original pointer had the inbounds flag. + if (!NewFlags.isInBounds()) { + if (auto *GEP = dyn_cast( + Ptr->getUnderlyingValue()->stripPointerCasts())) + if (GEP->isInBounds()) + NewFlags = GEPNoWrapFlags::inBounds(); + } + auto *NewVEP = new VPVectorEndPointerRecipe( + Ptr, &EVL, VEP->getIndexedTy(), NewFlags, VEP->getDebugLoc()); + NewVEP->insertBefore(VEP); + VEP->replaceAllUsesWith(NewVEP); + VEP->eraseFromParent(); + } + } +} + /// Replace recipes with their EVL variants. static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); @@ -2198,12 +2221,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl"); } - for (VPUser *U : to_vector(Plan.getVF().users())) { - if (auto *R = dyn_cast(U)) - R->setOperand(1, &EVL); - } - SmallVector ToErase; + replaceVFWithEVL(Plan.getVF(), EVL); for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) { for (VPUser *U : collectUsersRecursively(HeaderMask)) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 96db5bf4e9acc..dc2683e20ade9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -34,16 +34,16 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP15]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 @@ -137,8 +137,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] ; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP17]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) @@ -146,8 +146,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] ; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]])