Skip to content

Commit 6b78004

Browse files
committed
[LV] Fix MVE regression from llvm#132190
Register pressure was only considered if the vector bandwidth was being maximised (chosen either by the target or user options), but llvm#132190 inadvertently caused high pressure VFs to be pruned even when max bandwidth wasn't enabled. This PR returns to the previous behaviour.
1 parent 79a72c4 commit 6b78004

File tree

1 file changed

+21
-6
lines changed

1 file changed

+21
-6
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,10 @@ class LoopVectorizationCostModel {
956956
return expectedCost(UserVF).isValid();
957957
}
958958

959+
/// \return True if maximizing vector bandwidth is enabled by the target or
960+
/// user options.
961+
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
962+
959963
/// \return The size (in bits) of the smallest and widest types in the code
960964
/// that needs to be vectorized. We ignore values that remain scalar such as
961965
/// 64 bit loop indices.
@@ -3918,6 +3922,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39183922
return FixedScalableVFPair::getNone();
39193923
}
39203924

3925+
bool LoopVectorizationCostModel::useMaxBandwidth(
3926+
TargetTransformInfo::RegisterKind RegKind) {
3927+
return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
3928+
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3929+
(UseWiderVFIfCallVariantsPresent &&
3930+
Legal->hasVectorCallVariants())));
3931+
}
3932+
39213933
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39223934
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39233935
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3983,10 +3995,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39833995
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39843996
: TargetTransformInfo::RGK_FixedWidthVector;
39853997
ElementCount MaxVF = MaxVectorElementCount;
3986-
if (MaximizeBandwidth ||
3987-
(MaximizeBandwidth.getNumOccurrences() == 0 &&
3988-
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3989-
(UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
3998+
if (useMaxBandwidth(RegKind)) {
39903999
auto MaxVectorElementCountMaxBW = ElementCount::get(
39914000
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
39924001
ComputeScalableMaxVF);
@@ -4349,7 +4358,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43494358

43504359
/// Don't consider the VF if it exceeds the number of registers for the
43514360
/// target.
4352-
if (RU.exceedsMaxNumRegs(TTI))
4361+
if (CM.useMaxBandwidth(VF.isScalable()
4362+
? TargetTransformInfo::RGK_ScalableVector
4363+
: TargetTransformInfo::RGK_FixedWidthVector) &&
4364+
RU.exceedsMaxNumRegs(TTI))
43534365
continue;
43544366

43554367
InstructionCost C = CM.expectedCost(VF);
@@ -7119,7 +7131,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71197131
InstructionCost Cost = cost(*P, VF);
71207132
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
71217133

7122-
if (RU.exceedsMaxNumRegs(TTI)) {
7134+
if (CM.useMaxBandwidth(VF.isScalable()
7135+
? TargetTransformInfo::RGK_ScalableVector
7136+
: TargetTransformInfo::RGK_FixedWidthVector) &&
7137+
RU.exceedsMaxNumRegs(TTI)) {
71237138
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
71247139
<< VF << " because it uses too many registers\n");
71257140
continue;

0 commit comments

Comments
 (0)