@@ -956,6 +956,10 @@ class LoopVectorizationCostModel {
956
956
return expectedCost (UserVF).isValid ();
957
957
}
958
958
959
+ // / \return True if maximizing vector bandwidth is enabled by the target or
960
+ // / user options.
961
+ bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
962
+
959
963
// / \return The size (in bits) of the smallest and widest types in the code
960
964
// / that needs to be vectorized. We ignore values that remain scalar such as
961
965
// / 64 bit loop indices.
@@ -3918,6 +3922,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3918
3922
return FixedScalableVFPair::getNone ();
3919
3923
}
3920
3924
3925
+ bool LoopVectorizationCostModel::useMaxBandwidth (
3926
+ TargetTransformInfo::RegisterKind RegKind) {
3927
+ return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3928
+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3929
+ (UseWiderVFIfCallVariantsPresent &&
3930
+ Legal->hasVectorCallVariants ())));
3931
+ }
3932
+
3921
3933
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
3922
3934
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
3923
3935
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3983,10 +3995,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
3983
3995
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
3984
3996
: TargetTransformInfo::RGK_FixedWidthVector;
3985
3997
ElementCount MaxVF = MaxVectorElementCount;
3986
- if (MaximizeBandwidth ||
3987
- (MaximizeBandwidth.getNumOccurrences () == 0 &&
3988
- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3989
- (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
3998
+ if (useMaxBandwidth (RegKind)) {
3990
3999
auto MaxVectorElementCountMaxBW = ElementCount::get (
3991
4000
llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
3992
4001
ComputeScalableMaxVF);
@@ -4349,7 +4358,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4349
4358
4350
4359
// / Don't consider the VF if it exceeds the number of registers for the
4351
4360
// / target.
4352
- if (RU.exceedsMaxNumRegs (TTI))
4361
+ if (CM.useMaxBandwidth (VF.isScalable ()
4362
+ ? TargetTransformInfo::RGK_ScalableVector
4363
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
4364
+ RU.exceedsMaxNumRegs (TTI))
4353
4365
continue ;
4354
4366
4355
4367
InstructionCost C = CM.expectedCost (VF);
@@ -7119,7 +7131,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7119
7131
InstructionCost Cost = cost (*P, VF);
7120
7132
VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7121
7133
7122
- if (RU.exceedsMaxNumRegs (TTI)) {
7134
+ if (CM.useMaxBandwidth (VF.isScalable ()
7135
+ ? TargetTransformInfo::RGK_ScalableVector
7136
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
7137
+ RU.exceedsMaxNumRegs (TTI)) {
7123
7138
LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
7124
7139
<< VF << " because it uses too many registers\n " );
7125
7140
continue ;
0 commit comments