diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 29f3940ed6fa7..e7eb5006c694d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7433,6 +7433,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan, // comparing against the legacy cost isn't desirable. if (isa(&R)) return true; + + // The VPlan-based cost model may calculate the cost of strided load/store + // which can't be modeled in the legacy cost model. + if (isa(&R)) + if (cast(&R)->isReverse()) + return true; + if (Instruction *UI = GetInstructionForCost(&R)) SeenInstrs.insert(UI); } @@ -8281,7 +8288,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, auto *GEP = dyn_cast( Ptr->getUnderlyingValue()->stripPointerCasts()); VPSingleDefRecipe *VectorPtr; - if (Reverse) { + if (Reverse && !CM.foldTailWithEVL()) { // When folding the tail, we may compute an address that we don't in the // original scalar loop and it may not be inbounds. Drop Inbounds in that // case. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index aa5f92b235555..4583854e8b875 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2603,17 +2603,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -/// Use all-true mask for reverse rather than actual mask, as it avoids a -/// dependence w/o affecting the result. -static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand, - Value *EVL, const Twine &Name) { - VectorType *ValTy = cast(Operand->getType()); - Value *AllTrueMask = - Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue()); - return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse, - {Operand, AllTrueMask, EVL}, nullptr, Name); -} - void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { auto *LI = cast(&Ingredient); @@ -2628,18 +2617,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { Value *EVL = State.get(getEVL(), VPLane(0)); Value *Addr = State.get(getAddr(), !CreateGather); Value *Mask = nullptr; - if (VPValue *VPMask = getMask()) { + if (VPValue *VPMask = getMask()) Mask = State.get(VPMask); - if (isReverse()) - Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); - } else { + else Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); - } if (CreateGather) { NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL}, nullptr, "wide.masked.gather"); + } else if (isReverse()) { + auto *EltTy = DataTy->getElementType(); + auto *PtrTy = Addr->getType(); + Value *Operands[] = { + Addr, + ConstantInt::getSigned(Builder.getInt32Ty(), + -LI->getDataLayout().getTypeAllocSize(EltTy)), + Mask, EVL}; + NewLI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, + {DataTy, PtrTy, Builder.getInt32Ty()}, + Operands, nullptr, "vp.reverse.load"); } else { VectorBuilder VBuilder(Builder); VBuilder.setEVL(EVL).setMask(Mask); @@ -2650,8 +2647,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment)); State.addMetadata(NewLI, LI); Instruction *Res = NewLI; - if (isReverse()) - Res = createReverseEVL(Builder, Res, EVL, "vp.reverse"); State.set(this, Res); } @@ -2670,14 +2665,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, getLoadStoreAlignment(const_cast(&Ingredient)); unsigned AS = getLoadStoreAddressSpace(const_cast(&Ingredient)); - InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) - return Cost; + return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, + AS, Ctx.CostKind); - return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - cast(Ty), {}, Ctx.CostKind, - 0); + return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, + getAddr()->getUnderlyingValue(), false, + Alignment, Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2749,21 +2743,29 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { CallInst *NewSI = nullptr; Value *StoredVal = State.get(StoredValue); Value *EVL = State.get(getEVL(), VPLane(0)); - if (isReverse()) - StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); Value *Mask = nullptr; - if (VPValue *VPMask = getMask()) { + if (VPValue *VPMask = getMask()) Mask = State.get(VPMask); - if (isReverse()) - Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask"); - } else { + else Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue()); - } + Value *Addr = State.get(getAddr(), !CreateScatter); if (CreateScatter) { NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()), Intrinsic::vp_scatter, {StoredVal, Addr, Mask, EVL}); + } else if (isReverse()) { + Type *StoredValTy = StoredVal->getType(); + auto *EltTy = cast(StoredValTy)->getElementType(); + auto *PtrTy = Addr->getType(); + Value *Operands[] = { + StoredVal, Addr, + ConstantInt::getSigned(Builder.getInt32Ty(), + -SI->getDataLayout().getTypeAllocSize(EltTy)), + Mask, EVL}; + NewSI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store, + {StoredValTy, PtrTy, Builder.getInt32Ty()}, + Operands); } else { VectorBuilder VBuilder(Builder); VBuilder.setEVL(EVL).setMask(Mask); @@ -2791,14 +2793,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, getLoadStoreAlignment(const_cast(&Ingredient)); unsigned AS = getLoadStoreAddressSpace(const_cast(&Ingredient)); - InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) - return Cost; + return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, + AS, Ctx.CostKind); - return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - cast(Ty), {}, Ctx.CostKind, - 0); + return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, + getAddr()->getUnderlyingValue(), false, + Alignment, Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 5b579b0749c67..1e56dc6feccf6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -34,21 +34,11 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]] -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 +; IF-EVL-NEXT: [[VP_REVERSE_LOAD:%.*]] = call @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr align 4 [[TMP12]], i32 -4, splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; IF-EVL-NEXT: call void @llvm.experimental.vp.strided.store.nxv4i32.p0.i32( [[VP_REVERSE_LOAD]], ptr align 4 [[TMP17]], i32 -4, splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] @@ -148,23 +138,11 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) ; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 +; IF-EVL-NEXT: [[VP_REVERSE_LOAD:%.*]] = call @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr align 4 [[TMP20]], i32 -4, [[TMP15]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]] -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 +; IF-EVL-NEXT: call void @llvm.experimental.vp.strided.store.nxv4i32.p0.i32( [[VP_REVERSE_LOAD]], ptr align 4 [[TMP25]], i32 -4, [[TMP15]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] @@ -258,48 +236,33 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 ; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[N_VEC]] ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] -; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] -; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] -; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 +; IF-EVL-NEXT: [[VP_REVERSE_LOAD:%.*]] = call @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr align 1 [[TMP13]], i32 -1, splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE_LOAD]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i8.nxv8p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] -; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP20]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 +; IF-EVL-NEXT: call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i32( [[WIDE_MASKED_GATHER]], ptr align 1 [[TMP20]], i32 -1, splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP22]] -; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] -; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] -; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE2]], ptr align 1 [[TMP26]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP21]], i32 0 +; IF-EVL-NEXT: call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i32( [[WIDE_MASKED_GATHER]], ptr align 1 [[TMP26]], i32 -1, splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index a2f85b9ed4ffe..ab1fe4f38d3b1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -38,13 +38,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP15]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] -; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_REVERSE]], ptr align 8 [[TMP20]], splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP14]], i32 0 +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv2i64.p0.i32( zeroinitializer, ptr align 8 [[TMP20]], i32 -8, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]