llvm · david-arm · Jun 13, 2025 · Jun 13, 2025 · lukel97 · Jun 16, 2025
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1443,7 +1443,7 @@ class TargetTransformInfo {
   /// Index = -1 to indicate that there is no information about the index value.
   LLVM_ABI InstructionCost
   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                           unsigned Index, TTI::TargetCostKind CostKind) const;
+                           int Index, TTI::TargetCostKind CostKind) const;
 
   /// \return The expected cost of control-flow related instructions such as
   /// Phi, Ret, Br, Switch.
@@ -1465,14 +1465,21 @@ class TargetTransformInfo {
       OperandValueInfo Op2Info = {OK_AnyValue, OP_None},
       const Instruction *I = nullptr) const;
 
+  enum : int {
+    UnknownIndex = -1,
+    LastIndex = -2,
+  };
+
+  static inline bool isKnownVectorIndex(int Index) { return Index >= 0; }
+
   /// \return The expected cost of vector Insert and Extract.
   /// Use -1 to indicate that there is no information on the index value.
   /// This is used when the instruction is not available; a typical use
   /// case is to provision the cost of vectorization/scalarization in
   /// vectorizer passes.
   LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                               TTI::TargetCostKind CostKind,
-                                              unsigned Index = -1,
+                                              int Index = UnknownIndex,
                                               const Value *Op0 = nullptr,
                                               const Value *Op1 = nullptr) const;
 
@@ -1486,7 +1493,7 @@ class TargetTransformInfo {
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   LLVM_ABI InstructionCost getVectorInstrCost(
-      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
       Value *Scalar,
       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const;
 
@@ -1498,7 +1505,7 @@ class TargetTransformInfo {
   /// exists (e.g., from basic blocks during transformation).
   LLVM_ABI InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                               TTI::TargetCostKind CostKind,
-                                              unsigned Index = -1) const;
+                                              int Index = UnknownIndex) const;
 
   /// \return The expected cost of aggregate inserts and extracts. This is
   /// used when the instruction is not available; a typical use case is to

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -758,7 +758,7 @@ class TargetTransformInfoImplBase {
 
   virtual InstructionCost
   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                           unsigned Index, TTI::TargetCostKind CostKind) const {
+                           int Index, TTI::TargetCostKind CostKind) const {
     return 1;
   }
 
@@ -781,7 +781,7 @@ class TargetTransformInfoImplBase {
 
   virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                              TTI::TargetCostKind CostKind,
-                                             unsigned Index, const Value *Op0,
+                                             int Index, const Value *Op0,
                                              const Value *Op1) const {
     return 1;
   }
@@ -791,15 +791,15 @@ class TargetTransformInfoImplBase {
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   virtual InstructionCost getVectorInstrCost(
-      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
       Value *Scalar,
       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
     return 1;
   }
 
   virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                              TTI::TargetCostKind CostKind,
-                                             unsigned Index) const {
+                                             int Index) const {
     return 1;
   }
 
@@ -1522,7 +1522,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       auto *IE = dyn_cast<InsertElementInst>(U);
       if (!IE)
         return TTI::TCC_Basic; // FIXME
-      unsigned Idx = -1;
+      int Idx = TargetTransformInfo::UnknownIndex;
       if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
         if (CI->getValue().getActiveBits() <= 32)
           Idx = CI->getZExtValue();
@@ -1641,7 +1641,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       auto *EEI = dyn_cast<ExtractElementInst>(U);
       if (!EEI)
         return TTI::TCC_Basic; // FIXME
-      unsigned Idx = -1;
+      int Idx = TargetTransformInfo::UnknownIndex;
       if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
         if (CI->getValue().getActiveBits() <= 32)
           Idx = CI->getZExtValue();

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1341,7 +1341,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   InstructionCost
   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                           unsigned Index,
+                           int Index,
                            TTI::TargetCostKind CostKind) const override {
     return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
                                        CostKind, Index, nullptr, nullptr) +
@@ -1409,8 +1409,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   }
 
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     TTI::TargetCostKind CostKind,
-                                     unsigned Index, const Value *Op0,
+                                     TTI::TargetCostKind CostKind, int Index,
+                                     const Value *Op0,
                                      const Value *Op1) const override {
     return getRegUsageForType(Val->getScalarType());
   }
@@ -1420,8 +1420,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     TTI::TargetCostKind CostKind,
-                                     unsigned Index, Value *Scalar,
+                                     TTI::TargetCostKind CostKind, int Index,
+                                     Value *Scalar,
                                      ArrayRef<std::tuple<Value *, User *, int>>
                                          ScalarUserAndIdx) const override {
     return thisT()->getVectorInstrCost(Opcode, Val, CostKind, Index, nullptr,
@@ -1430,7 +1430,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                      TTI::TargetCostKind CostKind,
-                                     unsigned Index) const override {
+                                     int Index) const override {
     Value *Op0 = nullptr;
     Value *Op1 = nullptr;
     if (auto *IE = dyn_cast<InsertElementInst>(&I)) {

diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1058,7 +1058,7 @@ InstructionCost TargetTransformInfo::getCastInstrCost(
 }
 
 InstructionCost TargetTransformInfo::getExtractWithExtendCost(
-    unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index,
+    unsigned Opcode, Type *Dst, VectorType *VecTy, int Index,
     TTI::TargetCostKind CostKind) const {
   InstructionCost Cost =
       TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index, CostKind);
@@ -1088,7 +1088,7 @@ InstructionCost TargetTransformInfo::getCmpSelInstrCost(
 }
 
 InstructionCost TargetTransformInfo::getVectorInstrCost(
-    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
     const Value *Op0, const Value *Op1) const {
   assert((Opcode == Instruction::InsertElement ||
           Opcode == Instruction::ExtractElement) &&
@@ -1100,7 +1100,7 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(
 }
 
 InstructionCost TargetTransformInfo::getVectorInstrCost(
-    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
     Value *Scalar,
     ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
   assert((Opcode == Instruction::InsertElement ||
@@ -1115,7 +1115,7 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(
 InstructionCost
 TargetTransformInfo::getVectorInstrCost(const Instruction &I, Type *Val,
                                         TTI::TargetCostKind CostKind,
-                                        unsigned Index) const {
+                                        int Index) const {
   // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
   // This is mentioned in the interface description and respected by all
   // callers, but never asserted upon.

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3642,7 +3642,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
 
 InstructionCost
 AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
-                                         VectorType *VecTy, unsigned Index,
+                                         VectorType *VecTy, int Index,
                                          TTI::TargetCostKind CostKind) const {
 
   // Make sure we were given a valid extend opcode.
@@ -3711,12 +3711,24 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
-    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
     bool HasRealUse, const Instruction *I, Value *Scalar,
     ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
   assert(Val->isVectorTy() && "This must be a vector type");
 
-  if (Index != -1U) {
+  if (Index == TargetTransformInfo::LastIndex) {
+    if (isa<ScalableVectorType>(Val)) {
+      // This typically requires both while and lastb instructions in order
+      // to extract the last element. If this is in a loop the while
+      // instruction can at least be hoisted out, although it will consume a
+      // predicate register. The cost should be more expensive than the base
+      // extract cost, which is 2 for most CPUs.
+      return CostKind == TTI::TCK_CodeSize ? 2 : 3;
+    }
+    Index = cast<FixedVectorType>(Val)->getNumElements() - 1;
+  }
+
+  if (TargetTransformInfo::isKnownVectorIndex(Index)) {
     // Legalize the type.
     std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
 
@@ -3884,16 +3896,15 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                    TTI::TargetCostKind CostKind,
-                                                   unsigned Index,
-                                                   const Value *Op0,
+                                                   int Index, const Value *Op0,
                                                    const Value *Op1) const {
   bool HasRealUse =
       Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
   return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, HasRealUse);
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(
-    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
     Value *Scalar,
     ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
   return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, false, nullptr,
@@ -3903,7 +3914,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(
 InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
                                                    Type *Val,
                                                    TTI::TargetCostKind CostKind,
-                                                   unsigned Index) const {
+                                                   int Index) const {
   return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index,
                                   true /* HasRealUse */, &I);
 }
@@ -4052,10 +4063,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
       // loading the vector from constant pool or in some cases, may also result
       // in scalarization. For now, we are approximating this with the
       // scalarization cost.
-      auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty,
-                                                CostKind, -1, nullptr, nullptr);
-      auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty,
-                                           CostKind, -1, nullptr, nullptr);
+      auto ExtractCost =
+          2 * getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind,
+                                 TargetTransformInfo::UnknownIndex, nullptr,
+                                 nullptr);
+      auto InsertCost = getVectorInstrCost(
+          Instruction::InsertElement, Ty, CostKind,
+          TargetTransformInfo::UnknownIndex, nullptr, nullptr);
       unsigned NElts = cast<FixedVectorType>(Ty)->getNumElements();
       return ExtractCost + InsertCost +
              NElts * getArithmeticInstrCost(Opcode, Ty->getScalarType(),
@@ -4153,9 +4167,11 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
         // On AArch64, without SVE, vector divisions are expanded
         // into scalar divisions of each pair of elements.
         Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind,
-                                   -1, nullptr, nullptr);
-        Cost += getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, -1,
-                                   nullptr, nullptr);
+                                   TargetTransformInfo::UnknownIndex, nullptr,
+                                   nullptr);
+        Cost += getVectorInstrCost(Instruction::InsertElement, Ty, CostKind,
+                                   TargetTransformInfo::UnknownIndex, nullptr,
+                                   nullptr);
       }
 
       // TODO: if one of the arguments is scalar, then it's not necessary to
@@ -4186,11 +4202,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
       return LT.first;
     return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
            (getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind) +
-            getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind, -1,
-                               nullptr, nullptr) *
+            getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind,
+                               TargetTransformInfo::UnknownIndex, nullptr,
+                               nullptr) *
                 2 +
-            getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, -1,
-                               nullptr, nullptr));
+            getVectorInstrCost(Instruction::InsertElement, Ty, CostKind,
+                               TargetTransformInfo::UnknownIndex, nullptr,
+                               nullptr));
   case ISD::ADD:
   case ISD::XOR:
   case ISD::OR:

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -73,7 +73,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   InstructionCost getVectorInstrCostHelper(
-      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, int Index,
       bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr,
       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
 
@@ -197,30 +197,30 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
 
   InstructionCost
   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
-                           unsigned Index,
+                           int Index,
                            TTI::TargetCostKind CostKind) const override;
 
   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                  const Instruction *I = nullptr) const override;
 
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     TTI::TargetCostKind CostKind,
-                                     unsigned Index, const Value *Op0,
+                                     TTI::TargetCostKind CostKind, int Index,
+                                     const Value *Op0,
                                      const Value *Op1) const override;
 
   /// \param ScalarUserAndIdx encodes the information about extracts from a
   /// vector with 'Scalar' being the value being extracted,'User' being the user
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     TTI::TargetCostKind CostKind,
-                                     unsigned Index, Value *Scalar,
+                                     TTI::TargetCostKind CostKind, int Index,
+                                     Value *Scalar,
                                      ArrayRef<std::tuple<Value *, User *, int>>
                                          ScalarUserAndIdx) const override;
 
   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                      TTI::TargetCostKind CostKind,
-                                     unsigned Index) const override;
+                                     int Index) const override;
 
   InstructionCost
   getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -834,7 +834,7 @@ GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
 
 InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
                                                TTI::TargetCostKind CostKind,
-                                               unsigned Index, const Value *Op0,
+                                               int Index, const Value *Op0,
                                                const Value *Op1) const {
   switch (Opcode) {
   case Instruction::ExtractElement:
@@ -853,7 +853,7 @@ InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     // operations, and we don't have to copy into a different register class.
 
     // Dynamic indexing isn't free and is best avoided.
-    return Index == ~0u ? 2 : 0;
+    return TargetTransformInfo::isKnownVectorIndex(Index) ? 0 : 2;
   }
   default:
     return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -169,8 +169,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
 
   using BaseT::getVectorInstrCost;
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                     TTI::TargetCostKind CostKind,
-                                     unsigned Index, const Value *Op0,
+                                     TTI::TargetCostKind CostKind, int Index,
+                                     const Value *Op0,
                                      const Value *Op1) const override;
 
   bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;