diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7d6dbd51a404d..4a733ff2395c5 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2807,8 +2807,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) { bool LoopAccessInfo::isInvariant(Value *V) const { auto *SE = PSE->getSE(); - // TODO: Is this really what we want? Even without FP SCEV, we may want some - // trivially loop-invariant FP values to be considered invariant. + if (TheLoop->isLoopInvariant(V)) + return true; if (!SE->isSCEVable(V->getType())) return false; const SCEV *S = SE->getSCEV(V); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8c41f896ad622..12148e1cdd8f4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1567,7 +1567,7 @@ class LoopVectorizationCostModel { /// Returns true if \p Op should be considered invariant and if it is /// trivially hoistable. - bool shouldConsiderInvariant(Value *Op); + bool shouldConsiderInvariant(Value *Op) const; /// Return the value of vscale used for tuning the cost model. std::optional getVScaleForTuning() const { return VScaleForTuning; } @@ -1763,8 +1763,7 @@ class LoopVectorizationCostModel { /// extracted. bool needsExtract(Value *V, ElementCount VF) const { Instruction *I = dyn_cast(V); - if (VF.isScalar() || !I || !TheLoop->contains(I) || - TheLoop->isLoopInvariant(I) || + if (VF.isScalar() || !I || shouldConsiderInvariant(I) || getWideningDecision(I, VF) == CM_Scalarize) return false; @@ -3118,7 +3117,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { // A helper that returns true if the given value is a getelementptr // instruction contained in the loop. auto IsLoopVaryingGEP = [&](Value *V) { - return isa(V) && !TheLoop->isLoopInvariant(V); + return isa(V) && !shouldConsiderInvariant(V); }; // A helper that evaluates a memory access's use of a pointer. If the use will @@ -3346,14 +3345,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const { // is correct. The easiest form of the later is to require that all values // stored are the same. return !(Legal->isInvariant(getLoadStorePointerOperand(I)) && - TheLoop->isLoopInvariant(cast(I)->getValueOperand())); + Legal->isInvariant(cast(I)->getValueOperand())); } case Instruction::UDiv: case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: // If the divisor is loop-invariant no predication is needed. - return !TheLoop->isLoopInvariant(I->getOperand(1)); + return !Legal->isInvariant(I->getOperand(1)); } } @@ -3410,7 +3409,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, Value *Op2 = I->getOperand(1); auto Op2Info = TTI.getOperandInfo(Op2); if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && - Legal->isInvariant(Op2)) + shouldConsiderInvariant(Op2)) Op2Info.Kind = TargetTransformInfo::OK_UniformValue; SmallVector Operands(I->operand_values()); @@ -3600,7 +3599,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // assuming aliasing and ordering which have already been checked. return true; // Storing the same value on every iteration. - return TheLoop->isLoopInvariant(cast(I)->getValueOperand()); + return Legal->isInvariant(cast(I)->getValueOperand()); }; auto IsUniformDecision = [&](Instruction *I, ElementCount VF) { @@ -5630,12 +5629,10 @@ static const SCEV *getAddressAccessSCEV( // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. - auto *SE = PSE.getSE(); unsigned NumOperands = Gep->getNumOperands(); for (unsigned Idx = 1; Idx < NumOperands; ++Idx) { Value *Opd = Gep->getOperand(Idx); - if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && - !Legal->isInductionVariable(Opd)) + if (!Legal->isInvariant(Opd) && !Legal->isInductionVariable(Opd)) return nullptr; } @@ -5747,9 +5744,8 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {}, CostKind); } - StoreInst *SI = cast(I); - bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand()); + bool IsLoopInvariantStoreValue = shouldConsiderInvariant(I); return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, CostKind) + @@ -5900,7 +5896,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, match(Op0, m_ZExtOrSExt(m_Value())) && Op0->getOpcode() == Op1->getOpcode() && Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() && - !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) && + !shouldConsiderInvariant(Op0) && !shouldConsiderInvariant(Op1) && (Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) { // Matched reduce.add(ext(mul(ext(A), ext(B))) @@ -5927,7 +5923,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost) return I == RetI ? RedCost : 0; } else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) && - !TheLoop->isLoopInvariant(RedOp)) { + !shouldConsiderInvariant(RedOp)) { // Matched reduce(ext(A)) bool IsUnsigned = isa(RedOp); auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy); @@ -5943,8 +5939,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, } else if (RedOp && RdxDesc.getOpcode() == Instruction::Add && match(RedOp, m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) { if (match(Op0, m_ZExtOrSExt(m_Value())) && - Op0->getOpcode() == Op1->getOpcode() && - !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) { + Op0->getOpcode() == Op1->getOpcode() && !shouldConsiderInvariant(Op0) && + !shouldConsiderInvariant(Op1)) { bool IsUnsigned = isa(Op0); Type *Op0Ty = Op0->getOperand(0)->getType(); Type *Op1Ty = Op1->getOperand(0)->getType(); @@ -6097,8 +6093,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { // A uniform store isn't neccessarily uniform-by-part // and we can't assume scalarization. - auto &SI = cast(I); - return TheLoop->isLoopInvariant(SI.getValueOperand()); + return shouldConsiderInvariant(&I); }; const InstructionCost GatherScatterCost = @@ -6331,8 +6326,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { case VFParamKind::OMP_Uniform: { Value *ScalarParam = CI->getArgOperand(Param.ParamPos); // Make sure the scalar parameter in the loop is invariant. - if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(ScalarParam), - TheLoop)) + if (!Legal->isInvariant(ScalarParam)) ParamsOk = false; break; } @@ -6405,7 +6399,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { } } -bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) { +bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) const { if (!Legal->isInvariant(Op)) return false; // Consider Op invariant, if it or its operands aren't predicated @@ -6441,7 +6435,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - auto *SE = PSE.getSE(); auto HasSingleCopyAfterVectorization = [this](Instruction *I, ElementCount VF) -> bool { @@ -6687,8 +6680,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, } case Instruction::Select: { SelectInst *SI = cast(I); - const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); - bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); + bool ScalarCond = shouldConsiderInvariant(SI->getCondition()); const Value *Op0, *Op1; using namespace llvm::PatternMatch; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 305d26d7f3bc1..984e25693e379 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -7,60 +7,45 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48) -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52) -; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP12]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 3, [[TMP2]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 [[TMP5]], 52 +; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[B]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv2i32() +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i32 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP4]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY1]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3) -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0 -; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[VECTOR_BODY]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1 -; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2 -; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP0]], i32 3) +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP15]], i32 [[TMP8]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i32 [[PREDPHI]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP17]] to i8 ; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 68b36f23de4b0..1d95a95c3d8f8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -27,11 +27,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vp.zext.nxv1i16.nxv1i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.mul.nxv1i16( zeroinitializer, [[TMP7]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[VP_OP1:%.*]] = call @llvm.vp.lshr.nxv1i16( [[VP_OP]], trunc ( splat (i32 1) to ), splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vp.trunc.nxv1i8.nxv1i16( [[VP_OP1]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0( [[TMP8]], align 1 zeroinitializer, splat (i1 true), i32 [[TMP3]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement [[VP_OP_LOAD]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 0, [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 +; CHECK-NEXT: store i8 [[TMP14]], ptr null, align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index 7de51bc3a8a68..71f7446f30b5c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -45,15 +45,16 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.vp.zext.nxv8i32.nxv8i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.mul.nxv8i32( [[TMP15]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP19]], 0 ; CHECK-NEXT: [[VP_OP2:%.*]] = call @llvm.vp.ashr.nxv8i32( [[TMP15]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[VP_OP3:%.*]] = call @llvm.vp.or.nxv8i32( [[VP_OP2]], zeroinitializer, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = icmp ult [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.vp.select.nxv8i32( [[TMP16]], [[VP_OP3]], zeroinitializer, i32 [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vp.trunc.nxv8i8.nxv8i32( [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( [[TMP18]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.vp.trunc.nxv8i16.nxv8i32( [[VP_OP]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( [[TMP19]], align 2 zeroinitializer, splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 +; CHECK-NEXT: store i16 [[TMP24]], ptr null, align 2 ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll index 0f34f6243f155..3cb01791b98e9 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll @@ -9,10 +9,11 @@ define i32 @foo(ptr %p) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 0 to i8, !dbg [[DBG3:![0-9]+]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: store i8 0, ptr [[P]], align 1, !dbg [[DBG3:![0-9]+]] -; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]], !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: store i8 [[TMP0]], ptr [[P]], align 1, !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]], !dbg [[DBG3]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG8:![0-9]+]] ; CHECK: [[SCALAR_PH]]: @@ -20,8 +21,8 @@ define i32 @foo(ptr %p) { ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG9]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG7]] -; CHECK-NEXT: store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG3]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG3]] +; CHECK-NEXT: store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG7]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG10:![0-9]+]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1, !dbg [[DBG11:![0-9]+]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG8]], !llvm.loop [[LOOP12:![0-9]+]] @@ -61,11 +62,11 @@ exit: ; preds = %loop ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) ; CHECK: [[META1]] = !DIFile(filename: "test.cpp", directory: {{.*}}) -; CHECK: [[DBG3]] = !DILocation(line: 6, scope: [[META4:![0-9]+]]) +; CHECK: [[DBG3]] = !DILocation(line: 5, scope: [[META4:![0-9]+]]) ; CHECK: [[META4]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 11, type: [[META5:![0-9]+]], spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META6:![0-9]+]]) ; CHECK: [[META5]] = distinct !DISubroutineType(types: [[META6]]) ; CHECK: [[META6]] = !{} -; CHECK: [[DBG7]] = !DILocation(line: 5, scope: [[META4]]) +; CHECK: [[DBG7]] = !DILocation(line: 6, scope: [[META4]]) ; CHECK: [[DBG8]] = !DILocation(line: 9, scope: [[META4]]) ; CHECK: [[DBG9]] = !DILocation(line: 4, scope: [[META4]]) ; CHECK: [[DBG10]] = !DILocation(line: 7, scope: [[META4]]) diff --git a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll index 6aaa44344ae46..f03a8535efdc8 100644 --- a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll +++ b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll @@ -43,10 +43,11 @@ define void @f() { ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 false to i32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: store i32 0, ptr @f.e, align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP3]], ptr @f.e, align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: store i8 10, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 16e071dec9604..ec773f905a2ff 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -163,10 +163,9 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1 ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] -; CHECK-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] -; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] ; CHECK-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP83]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -178,6 +177,7 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP11]], align 8 ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8