Skip to content

[LV] Refine loop-invariance checks #127516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2807,8 +2807,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {

bool LoopAccessInfo::isInvariant(Value *V) const {
auto *SE = PSE->getSE();
// TODO: Is this really what we want? Even without FP SCEV, we may want some
// trivially loop-invariant FP values to be considered invariant.
if (TheLoop->isLoopInvariant(V))
return true;
if (!SE->isSCEVable(V->getType()))
return false;
const SCEV *S = SE->getSCEV(V);
Expand Down
42 changes: 17 additions & 25 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ class LoopVectorizationCostModel {

/// Returns true if \p Op should be considered invariant and if it is
/// trivially hoistable.
bool shouldConsiderInvariant(Value *Op);
bool shouldConsiderInvariant(Value *Op) const;

/// Return the value of vscale used for tuning the cost model.
std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
Expand Down Expand Up @@ -1763,8 +1763,7 @@ class LoopVectorizationCostModel {
/// extracted.
bool needsExtract(Value *V, ElementCount VF) const {
Instruction *I = dyn_cast<Instruction>(V);
if (VF.isScalar() || !I || !TheLoop->contains(I) ||
TheLoop->isLoopInvariant(I) ||
if (VF.isScalar() || !I || shouldConsiderInvariant(I) ||
getWideningDecision(I, VF) == CM_Scalarize)
return false;

Expand Down Expand Up @@ -3118,7 +3117,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// A helper that returns true if the given value is a getelementptr
// instruction contained in the loop.
auto IsLoopVaryingGEP = [&](Value *V) {
return isa<GetElementPtrInst>(V) && !TheLoop->isLoopInvariant(V);
return isa<GetElementPtrInst>(V) && !shouldConsiderInvariant(V);
};

// A helper that evaluates a memory access's use of a pointer. If the use will
Expand Down Expand Up @@ -3346,14 +3345,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// is correct. The easiest form of the later is to require that all values
// stored are the same.
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
return !TheLoop->isLoopInvariant(I->getOperand(1));
return !Legal->isInvariant(I->getOperand(1));
}
}

Expand Down Expand Up @@ -3410,7 +3409,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
Legal->isInvariant(Op2))
shouldConsiderInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;

SmallVector<const Value *, 4> Operands(I->operand_values());
Expand Down Expand Up @@ -3600,7 +3599,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// assuming aliasing and ordering which have already been checked.
return true;
// Storing the same value on every iteration.
return TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand());
return Legal->isInvariant(cast<StoreInst>(I)->getValueOperand());
};

auto IsUniformDecision = [&](Instruction *I, ElementCount VF) {
Expand Down Expand Up @@ -5630,12 +5629,10 @@ static const SCEV *getAddressAccessSCEV(

// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
auto *SE = PSE.getSE();
unsigned NumOperands = Gep->getNumOperands();
for (unsigned Idx = 1; Idx < NumOperands; ++Idx) {
Value *Opd = Gep->getOperand(Idx);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
if (!Legal->isInvariant(Opd) && !Legal->isInductionVariable(Opd))
return nullptr;
}

Expand Down Expand Up @@ -5747,9 +5744,8 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {},
CostKind);
}
StoreInst *SI = cast<StoreInst>(I);

bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
bool IsLoopInvariantStoreValue = shouldConsiderInvariant(I);
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
Expand Down Expand Up @@ -5900,7 +5896,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
match(Op0, m_ZExtOrSExt(m_Value())) &&
Op0->getOpcode() == Op1->getOpcode() &&
Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
!TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) &&
!shouldConsiderInvariant(Op0) && !shouldConsiderInvariant(Op1) &&
(Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {

// Matched reduce.add(ext(mul(ext(A), ext(B)))
Expand All @@ -5927,7 +5923,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
return I == RetI ? RedCost : 0;
} else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
!TheLoop->isLoopInvariant(RedOp)) {
!shouldConsiderInvariant(RedOp)) {
// Matched reduce(ext(A))
bool IsUnsigned = isa<ZExtInst>(RedOp);
auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
Expand All @@ -5943,8 +5939,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
} else if (RedOp && RdxDesc.getOpcode() == Instruction::Add &&
match(RedOp, m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) {
if (match(Op0, m_ZExtOrSExt(m_Value())) &&
Op0->getOpcode() == Op1->getOpcode() &&
!TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) {
Op0->getOpcode() == Op1->getOpcode() && !shouldConsiderInvariant(Op0) &&
!shouldConsiderInvariant(Op1)) {
bool IsUnsigned = isa<ZExtInst>(Op0);
Type *Op0Ty = Op0->getOperand(0)->getType();
Type *Op1Ty = Op1->getOperand(0)->getType();
Expand Down Expand Up @@ -6097,8 +6093,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {

// A uniform store isn't neccessarily uniform-by-part
// and we can't assume scalarization.
auto &SI = cast<StoreInst>(I);
return TheLoop->isLoopInvariant(SI.getValueOperand());
return shouldConsiderInvariant(&I);
};

const InstructionCost GatherScatterCost =
Expand Down Expand Up @@ -6331,8 +6326,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
case VFParamKind::OMP_Uniform: {
Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
// Make sure the scalar parameter in the loop is invariant.
if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(ScalarParam),
TheLoop))
if (!Legal->isInvariant(ScalarParam))
ParamsOk = false;
break;
}
Expand Down Expand Up @@ -6405,7 +6399,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
}
}

bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) const {
if (!Legal->isInvariant(Op))
return false;
// Consider Op invariant, if it or its operands aren't predicated
Expand Down Expand Up @@ -6441,7 +6435,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
auto *SE = PSE.getSE();

auto HasSingleCopyAfterVectorization = [this](Instruction *I,
ElementCount VF) -> bool {
Expand Down Expand Up @@ -6687,8 +6680,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
bool ScalarCond = shouldConsiderInvariant(SI->getCondition());

const Value *Op0, *Op1;
using namespace llvm::PatternMatch;
Expand Down
79 changes: 32 additions & 47 deletions llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,60 +7,45 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48)
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52)
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP12]], 2
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 3, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[A]], 48
; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 [[TMP5]], 52
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[B]] to i32
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP4]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY1]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
; CHECK-NEXT: br label [[VECTOR_BODY]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP0]], i32 3)
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP15]], i32 [[TMP8]], i32 [[TMP7]]
; CHECK-NEXT: [[TMP17:%.*]] = shl i32 [[PREDPHI]], 8
; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP17]] to i8
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i8> [[VP_OP_LOAD]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 0, [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 1
; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
; CHECK-NEXT: store i8 [[TMP14]], ptr null, align 1
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,16 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 8 x i32> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP19]], 0
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP18]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> [[VP_OP]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16
; CHECK-NEXT: store i16 [[TMP24]], ptr null, align 2
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
Expand Down
Loading