Skip to content

Commit ea89a78

Browse files
committed
[llvm][AArch64][Assembly]: Add FP8FMA assembly and disassembly.
This patch adds the feature flag FP8FMA and the assembly/disassembly for the following instructions of NEON and SVE2: * NEON: - FMLALBlane - FMLALTlane - FMLALLBBlane - FMLALLBTlane - FMLALLTBlane - FMLALLTTlane - FMLALB - FMLALT - FMLALLB - FMLALLBT - FMLALLTB - FMLALLTT * SVE2: - FMLALB_ZZZI - FMLALT_ZZZI - FMLALB_ZZZ - FMLALT_ZZZ - FMLALLBB_ZZZI - FMLALLBT_ZZZI - FMLALLTB_ZZZI - FMLALLTT_ZZZI - FMLALLBB_ZZZ - FMLALLBT_ZZZ - FMLALLTB_ZZZ - FMLALLTT_ZZZ That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09 Change-Id: Ie7527ecd3be9de04a1176f41761986bb2e679fab
1 parent f753703 commit ea89a78

22 files changed

+944
-53
lines changed

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ enum ArchExtKind : unsigned {
162162
AEK_FPMR = 58, // FEAT_FPMR
163163
AEK_FP8 = 59, // FEAT_FP8
164164
AEK_FAMINMAX = 60, // FEAT_FAMINMAX
165+
AEK_FP8FMA = 61, // FEAT_FP8FMA
166+
AEK_SSVE_FP8FMA = 62, // FEAT_SSVE_FP8FMA
165167
AEK_NUM_EXTENSIONS
166168
};
167169
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -273,6 +275,8 @@ inline constexpr ExtensionInfo Extensions[] = {
273275
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
274276
{"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0},
275277
{"faminmax", AArch64::AEK_FAMINMAX, "+faminmax", "-faminmax", FEAT_INIT, "", 0},
278+
{"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fpmr", 0},
279+
{"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2", 0},
276280
// Special cases
277281
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
278282
};

llvm/include/llvm/TargetParser/SubtargetFeature.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace llvm {
3131
class raw_ostream;
3232
class Triple;
3333

34-
const unsigned MAX_SUBTARGET_WORDS = 4;
34+
const unsigned MAX_SUBTARGET_WORDS = 5;
3535
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
3636

3737
/// Container class for subtarget features.

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,12 @@ def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
517517
def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
518518
"Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
519519

520+
def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
521+
"Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
522+
523+
def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
524+
"Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
525+
520526
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
521527
"Apple A7 (the CPU formerly known as Cyclone)">;
522528

@@ -747,7 +753,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
747753
def SVE2p1Unsupported : AArch64Unsupported;
748754

749755
def SVE2Unsupported : AArch64Unsupported {
750-
let F = !listconcat([HasSVE2, HasSVE2orSME,
756+
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA,
751757
HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
752758
SVE2p1Unsupported.F);
753759
}
@@ -761,7 +767,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
761767
def SME2p1Unsupported : AArch64Unsupported;
762768

763769
def SME2Unsupported : AArch64Unsupported {
764-
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
770+
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA],
765771
SME2p1Unsupported.F);
766772
}
767773

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6055,6 +6055,15 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
60556055
v4f32, v8f16, OpNode>;
60566056
}
60576057

6058+
multiclass SIMDThreeSameVectorMLA<bit Q, string asm>{
6059+
def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
6060+
V128, v8f16, v16i8, null_frag>;
6061+
}
6062+
6063+
multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm>{
6064+
def v4f32 : BaseSIMDThreeSameVectorDot<Q, 0b0, sz, 0b1000, asm, ".4s", ".16b",
6065+
V128, v4f32, v16i8, null_frag>;
6066+
}
60586067

60596068
// FP8 assembly/disassembly classes
60606069

@@ -8521,6 +8530,31 @@ class BF16ToSinglePrecision<string asm>
85218530
}
85228531
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
85238532

8533+
//----------------------------------------------------------------------------
8534+
class BaseSIMDThreeSameVectorIndexB<bit Q, bit U, bits<2> sz, bits<4> opc,
8535+
string asm, string dst_kind,
8536+
RegisterOperand RegType,
8537+
RegisterOperand RegType_lo>
8538+
: BaseSIMDIndexedTied<Q, U, 0b0, sz, opc,
8539+
RegType, RegType, RegType_lo, VectorIndexB,
8540+
asm, "", dst_kind, ".16b", ".b", []> {
8541+
8542+
// idx = H:L:M
8543+
bits<4> idx;
8544+
let Inst{11} = idx{3};
8545+
let Inst{21-19} = idx{2-0};
8546+
}
8547+
8548+
multiclass SIMDThreeSameVectorMLAIndex<bit Q, string asm> {
8549+
def v8f16 : BaseSIMDThreeSameVectorIndexB<Q, 0b0, 0b11, 0b0000, asm, ".8h",
8550+
V128, V128_0to7>;
8551+
}
8552+
8553+
multiclass SIMDThreeSameVectorMLALIndex<bit Q, bits<2> sz, string asm> {
8554+
def v4f32 : BaseSIMDThreeSameVectorIndexB<Q, 0b1, sz, 0b1000, asm, ".4s",
8555+
V128, V128_0to7>;
8556+
}
8557+
85248558
//----------------------------------------------------------------------------
85258559
// Armv8.6 Matrix Multiply Extension
85268560
//----------------------------------------------------------------------------

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,13 @@ def HasFP8 : Predicate<"Subtarget->hasFP8()">,
166166
AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
167167
def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
168168
AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
169+
def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
170+
AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
171+
def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || "
172+
"(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
173+
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
174+
(all_of FeatureSVE2, FeatureFP8FMA)),
175+
"ssve-fp8fma or (sve2 and fp8fma)">;
169176

170177
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
171178
// they should be enabled if either has been specified.
@@ -9283,6 +9290,21 @@ let Predicates = [HasFAMINMAX] in {
92839290
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
92849291
} // End let Predicates = [HasFAMAXMIN]
92859292

9293+
let Predicates = [HasFP8FMA] in {
9294+
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
9295+
defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
9296+
defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
9297+
defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
9298+
defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
9299+
defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
9300+
9301+
defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
9302+
defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
9303+
defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
9304+
defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
9305+
defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
9306+
defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
9307+
} // End let Predicates = [HasFP8FMA]
92869308

92879309
include "AArch64InstrAtomics.td"
92889310
include "AArch64SVEInstrInfo.td"

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,8 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
981981
case AArch64::FPR64_loRegClassID:
982982
case AArch64::FPR16_loRegClassID:
983983
return 16;
984+
case AArch64::FPR128_0to7RegClassID:
985+
return 8;
984986
}
985987
}
986988

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,13 @@ def FPR128_lo : RegisterClass<"AArch64",
467467
v8bf16],
468468
128, (trunc FPR128, 16)>;
469469

470+
// The lower 8 vector registers. Some instructions can only take registers
471+
// in this range.
472+
def FPR128_0to7 : RegisterClass<"AArch64",
473+
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
474+
v8bf16],
475+
128, (trunc FPR128, 8)>;
476+
470477
// Pairs, triples, and quads of 64-bit vector registers.
471478
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
472479
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
@@ -534,6 +541,15 @@ def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
534541
let ParserMatchClass = VectorRegLoAsmOperand;
535542
}
536543

544+
def VectorReg0to7AsmOperand : AsmOperandClass {
545+
let Name = "VectorReg0to7";
546+
let PredicateMethod = "isNeonVectorReg0to7";
547+
}
548+
549+
def V128_0to7 : RegisterOperand<FPR128_0to7, "printVRegOperand"> {
550+
let ParserMatchClass = VectorReg0to7AsmOperand;
551+
}
552+
537553
class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize>
538554
: AsmOperandClass {
539555
let Name = "TypedVectorList" # count # "_" # lanes # eltsize;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4029,3 +4029,22 @@ let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
40294029
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
40304030
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
40314031
} // End HasSVE2orSME2, HasFAMINMAX
4032+
4033+
let Predicates = [HasSSVE_FP8FMA] in {
4034+
// FP8 Widening Multiply-Add Long - Indexed Group
4035+
def FMLALB_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b0, "fmlalb">;
4036+
def FMLALT_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b1, "fmlalt">;
4037+
// FP8 Widening Multiply-Add Long Group
4038+
def FMLALB_ZZZ : sve2_fp8_mla<0b100, ZPR16, "fmlalb">;
4039+
def FMLALT_ZZZ : sve2_fp8_mla<0b101, ZPR16, "fmlalt">;
4040+
// FP8 Widening Multiply-Add Long Long - Indexed Group
4041+
def FMLALLBB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b00, "fmlallbb">;
4042+
def FMLALLBT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b01, "fmlallbt">;
4043+
def FMLALLTB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b10, "fmlalltb">;
4044+
def FMLALLTT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b11, "fmlalltt">;
4045+
// FP8 Widening Multiply-Add Long Long Group
4046+
def FMLALLBB_ZZZ : sve2_fp8_mla<0b000, ZPR32, "fmlallbb">;
4047+
def FMLALLBT_ZZZ : sve2_fp8_mla<0b001, ZPR32, "fmlallbt">;
4048+
def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
4049+
def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
4050+
} // End HasSSVE_FP8FMA

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures =
2424
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
2525
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26-
HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
26+
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA];
2727

2828
let FullInstRWOverlapCheck = 0;
2929
}

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,12 @@ class AArch64Operand : public MCParsedAsmOperand {
12231223
Reg.RegNum));
12241224
}
12251225

1226+
bool isNeonVectorReg0to7() const {
1227+
return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
1228+
(AArch64MCRegisterClasses[AArch64::FPR128_0to7RegClassID].contains(
1229+
Reg.RegNum));
1230+
}
1231+
12261232
bool isMatrix() const { return Kind == k_MatrixRegister; }
12271233
bool isMatrixTileList() const { return Kind == k_MatrixTileList; }
12281234

@@ -1766,6 +1772,11 @@ class AArch64Operand : public MCParsedAsmOperand {
17661772
Inst.addOperand(MCOperand::createReg(getReg()));
17671773
}
17681774

1775+
void addVectorReg0to7Operands(MCInst &Inst, unsigned N) const {
1776+
assert(N == 1 && "Invalid number of operands!");
1777+
Inst.addOperand(MCOperand::createReg(getReg()));
1778+
}
1779+
17691780
enum VecListIndexType {
17701781
VecListIdx_DReg = 0,
17711782
VecListIdx_QReg = 1,
@@ -2598,31 +2609,31 @@ static std::optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
25982609

25992610
switch (VectorKind) {
26002611
case RegKind::NeonVector:
2601-
Res =
2602-
StringSwitch<std::pair<int, int>>(Suffix.lower())
2603-
.Case("", {0, 0})
2604-
.Case(".1d", {1, 64})
2605-
.Case(".1q", {1, 128})
2606-
// '.2h' needed for fp16 scalar pairwise reductions
2607-
.Case(".2h", {2, 16})
2608-
.Case(".2s", {2, 32})
2609-
.Case(".2d", {2, 64})
2610-
// '.4b' is another special case for the ARMv8.2a dot product
2611-
// operand
2612-
.Case(".4b", {4, 8})
2613-
.Case(".4h", {4, 16})
2614-
.Case(".4s", {4, 32})
2615-
.Case(".8b", {8, 8})
2616-
.Case(".8h", {8, 16})
2617-
.Case(".16b", {16, 8})
2618-
// Accept the width neutral ones, too, for verbose syntax. If those
2619-
// aren't used in the right places, the token operand won't match so
2620-
// all will work out.
2621-
.Case(".b", {0, 8})
2622-
.Case(".h", {0, 16})
2623-
.Case(".s", {0, 32})
2624-
.Case(".d", {0, 64})
2625-
.Default({-1, -1});
2612+
Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
2613+
.Case("", {0, 0})
2614+
.Case(".1d", {1, 64})
2615+
.Case(".1q", {1, 128})
2616+
// '.2h' needed for fp16 scalar pairwise reductions
2617+
.Case(".2h", {2, 16})
2618+
.Case(".2b", {2, 8})
2619+
.Case(".2s", {2, 32})
2620+
.Case(".2d", {2, 64})
2621+
// '.4b' is another special case for the ARMv8.2a dot product
2622+
// operand
2623+
.Case(".4b", {4, 8})
2624+
.Case(".4h", {4, 16})
2625+
.Case(".4s", {4, 32})
2626+
.Case(".8b", {8, 8})
2627+
.Case(".8h", {8, 16})
2628+
.Case(".16b", {16, 8})
2629+
// Accept the width neutral ones, too, for verbose syntax. If
2630+
// those aren't used in the right places, the token operand won't
2631+
// match so all will work out.
2632+
.Case(".b", {0, 8})
2633+
.Case(".h", {0, 16})
2634+
.Case(".s", {0, 32})
2635+
.Case(".d", {0, 64})
2636+
.Default({-1, -1});
26262637
break;
26272638
case RegKind::SVEPredicateAsCounter:
26282639
case RegKind::SVEPredicateVector:
@@ -3641,6 +3652,8 @@ static const struct Extension {
36413652
{"fpmr", {AArch64::FeatureFPMR}},
36423653
{"fp8", {AArch64::FeatureFP8}},
36433654
{"faminmax", {AArch64::FeatureFAMINMAX}},
3655+
{"fp8fma", {AArch64::FeatureFP8FMA}},
3656+
{"ssve-fp8fma", {AArch64::FeatureSSVE_FP8FMA}},
36443657
};
36453658

36463659
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {

0 commit comments

Comments
 (0)