Skip to content

Commit be2827c

Browse files
authored
JIT: Added SVE Prefetch* APIs. (#103094)
* Initial APIs * Added SvePrefetch test template. Prefetch APIs now have special codegen. * Minor cleanup * Feedback * Some cleanup * More work * Tests pass * Revert changes * put back * Fix merge * Fixing enum type * Quick test fix * Fix api * Feedback * Feedback * Feedback - testing prefetch types * load fix
1 parent 7d23d61 commit be2827c

File tree

12 files changed

+442
-18
lines changed

12 files changed

+442
-18
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26916,6 +26916,10 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2691626916
case NI_Sve_Load2xVectorAndUnzip:
2691726917
case NI_Sve_Load3xVectorAndUnzip:
2691826918
case NI_Sve_Load4xVectorAndUnzip:
26919+
case NI_Sve_PrefetchBytes:
26920+
case NI_Sve_PrefetchInt16:
26921+
case NI_Sve_PrefetchInt32:
26922+
case NI_Sve_PrefetchInt64:
2691926923
addr = Op(2);
2692026924
break;
2692126925
#endif // TARGET_ARM64

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,14 @@ void HWIntrinsicInfo::lookupImmBounds(
448448
}
449449
break;
450450

451+
case NI_Sve_PrefetchBytes:
452+
case NI_Sve_PrefetchInt16:
453+
case NI_Sve_PrefetchInt32:
454+
case NI_Sve_PrefetchInt64:
455+
immLowerBound = (int)SVE_PRFOP_PLDL1KEEP;
456+
immUpperBound = (int)SVE_PRFOP_CONST15;
457+
break;
458+
451459
default:
452460
unreached();
453461
}

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
14891489
break;
14901490
}
14911491

1492+
case NI_Sve_PrefetchBytes:
1493+
case NI_Sve_PrefetchInt16:
1494+
case NI_Sve_PrefetchInt32:
1495+
case NI_Sve_PrefetchInt64:
1496+
{
1497+
assert(hasImmediateOperand);
1498+
assert(HWIntrinsicInfo::HasEnumOperand(intrin.id));
1499+
if (intrin.op3->IsCnsIntOrI())
1500+
{
1501+
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize,
1502+
(insSvePrfop)intrin.op3->AsIntConCommon()->IconValue(), op1Reg,
1503+
op2Reg, 0);
1504+
}
1505+
else
1506+
{
1507+
assert(!intrin.op3->isContainedIntOrIImmed());
1508+
1509+
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
1510+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
1511+
{
1512+
const insSvePrfop prfop = (insSvePrfop)helper.ImmValue();
1513+
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize, prfop, op1Reg, op2Reg, 0);
1514+
}
1515+
}
1516+
break;
1517+
}
1518+
14921519
case NI_Vector64_ToVector128:
14931520
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
14941521
break;

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ HARDWARE_INTRINSIC(Sve, Negate,
148148
HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
149149
HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
150150
HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
151+
HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
152+
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
153+
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
154+
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
151155
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
152156
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
153157
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3194,6 +3194,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
31943194
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
31953195
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
31963196
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
3197+
case NI_Sve_PrefetchBytes:
3198+
case NI_Sve_PrefetchInt16:
3199+
case NI_Sve_PrefetchInt32:
3200+
case NI_Sve_PrefetchInt64:
31973201
assert(hasImmediateOperand);
31983202
assert(varTypeIsIntegral(intrin.op3));
31993203
if (intrin.op3->IsCnsIntOrI())

src/coreclr/jit/lsraarm64.cpp

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14471447
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
14481448
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
14491449
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
1450+
case NI_Sve_PrefetchBytes:
1451+
case NI_Sve_PrefetchInt16:
1452+
case NI_Sve_PrefetchInt32:
1453+
case NI_Sve_PrefetchInt64:
14501454
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
14511455
break;
14521456

@@ -1966,28 +1970,40 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19661970
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
19671971
}
19681972
}
1969-
else if (intrin.id == NI_Sve_StoreAndZip)
1970-
{
1971-
srcCount += BuildAddrUses(intrin.op2);
1972-
}
19731973
else
19741974
{
1975-
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
1975+
switch (intrin.id)
1976+
{
1977+
case NI_Sve_StoreAndZip:
1978+
case NI_Sve_PrefetchBytes:
1979+
case NI_Sve_PrefetchInt16:
1980+
case NI_Sve_PrefetchInt32:
1981+
case NI_Sve_PrefetchInt64:
1982+
assert(intrinsicTree->OperIsMemoryLoadOrStore());
1983+
srcCount += BuildAddrUses(intrin.op2);
1984+
break;
19761985

1977-
if (intrin.op2->gtType == TYP_MASK)
1978-
{
1979-
assert(lowVectorOperandNum != 2);
1980-
candidates = RBM_ALLMASK.GetPredicateRegSet();
1981-
}
1986+
default:
1987+
{
1988+
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
19821989

1983-
if (forceOp2DelayFree)
1984-
{
1985-
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
1986-
}
1987-
else
1988-
{
1989-
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
1990-
: BuildOperandUses(intrin.op2, candidates);
1990+
if (intrin.op2->gtType == TYP_MASK)
1991+
{
1992+
assert(lowVectorOperandNum != 2);
1993+
candidates = RBM_ALLMASK.GetPredicateRegSet();
1994+
}
1995+
1996+
if (forceOp2DelayFree)
1997+
{
1998+
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
1999+
}
2000+
else
2001+
{
2002+
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
2003+
: BuildOperandUses(intrin.op2, candidates);
2004+
}
2005+
}
2006+
break;
19912007
}
19922008
}
19932009

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,67 @@ public enum SveMaskPattern : byte
9292
/// </summary>
9393
All = 31 // All available (implicitly a multiple of two).
9494
}
95+
96+
public enum SvePrefetchType : byte
97+
{
98+
/// <summary>
99+
/// PLDL1KEEP
100+
/// </summary>
101+
LoadL1Temporal = 0,
102+
103+
/// <summary>
104+
/// PLDL1STRM
105+
/// </summary>
106+
LoadL1NonTemporal = 1,
107+
108+
/// <summary>
109+
/// PLDL2KEEP
110+
/// </summary>
111+
LoadL2Temporal = 2,
112+
113+
/// <summary>
114+
/// PLDL2STRM
115+
/// </summary>
116+
LoadL2NonTemporal = 3,
117+
118+
/// <summary>
119+
/// PLDL3KEEP
120+
/// </summary>
121+
LoadL3Temporal = 4,
122+
123+
/// <summary>
124+
/// PLDL3STRM
125+
/// </summary>
126+
LoadL3NonTemporal = 5,
127+
128+
/// <summary>
129+
/// PSTL1KEEP
130+
/// </summary>
131+
StoreL1Temporal = 8,
132+
133+
/// <summary>
134+
/// PSTL1STRM
135+
/// </summary>
136+
StoreL1NonTemporal = 9,
137+
138+
/// <summary>
139+
/// PSTL2KEEP
140+
/// </summary>
141+
StoreL2Temporal = 10,
142+
143+
/// <summary>
144+
/// PSTL2STRM
145+
/// </summary>
146+
StoreL2NonTemporal = 11,
147+
148+
/// <summary>
149+
/// PSTL3KEEP
150+
/// </summary>
151+
StoreL3Temporal = 12,
152+
153+
/// <summary>
154+
/// PSTL3STRM
155+
/// </summary>
156+
StoreL3NonTemporal = 13
157+
};
95158
}

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3376,6 +3376,30 @@ internal Arm64() { }
33763376
/// </summary>
33773377
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
33783378

3379+
/// <summary>
3380+
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
3381+
/// PRFB op, Pg, [Xbase, #0, MUL VL]
3382+
/// </summary>
3383+
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }
3384+
3385+
/// <summary>
3386+
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
3387+
/// PRFH op, Pg, [Xbase, #0, MUL VL]
3388+
/// </summary>
3389+
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }
3390+
3391+
/// <summary>
3392+
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
3393+
/// PRFW op, Pg, [Xbase, #0, MUL VL]
3394+
/// </summary>
3395+
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }
3396+
3397+
/// <summary>
3398+
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
3399+
/// PRFD op, Pg, [Xbase, #0, MUL VL]
3400+
/// </summary>
3401+
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }
3402+
33793403

33803404
/// Reverse all elements
33813405

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3432,6 +3432,29 @@ internal Arm64() { }
34323432
/// </summary>
34333433
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) => PopCount(value);
34343434

3435+
/// <summary>
3436+
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
3437+
/// PRFB op, Pg, [Xbase, #0, MUL VL]
3438+
/// </summary>
3439+
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchBytes(mask, address, prefetchType);
3440+
3441+
/// <summary>
3442+
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
3443+
/// PRFH op, Pg, [Xbase, #0, MUL VL]
3444+
/// </summary>
3445+
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt16(mask, address, prefetchType);
3446+
3447+
/// <summary>
3448+
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
3449+
/// PRFW op, Pg, [Xbase, #0, MUL VL]
3450+
/// </summary>
3451+
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt32(mask, address, prefetchType);
3452+
3453+
/// <summary>
3454+
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
3455+
/// PRFD op, Pg, [Xbase, #0, MUL VL]
3456+
/// </summary>
3457+
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt64(mask, address, prefetchType);
34353458

34363459
/// Reverse all elements
34373460

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4649,6 +4649,11 @@ internal Arm64() { }
46494649
public static System.Numerics.Vector<ulong> PopCount(System.Numerics.Vector<long> value) { throw null; }
46504650
public static System.Numerics.Vector<ulong> PopCount(System.Numerics.Vector<ulong> value) { throw null; }
46514651

4652+
public static unsafe void PrefetchBytes(System.Numerics.Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
4653+
public static unsafe void PrefetchInt16(System.Numerics.Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
4654+
public static unsafe void PrefetchInt32(System.Numerics.Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
4655+
public static unsafe void PrefetchInt64(System.Numerics.Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
4656+
46524657
public static System.Numerics.Vector<byte> ReverseElement(System.Numerics.Vector<byte> value) { throw null; }
46534658
public static System.Numerics.Vector<double> ReverseElement(System.Numerics.Vector<double> value) { throw null; }
46544659
public static System.Numerics.Vector<short> ReverseElement(System.Numerics.Vector<short> value) { throw null; }
@@ -4981,6 +4986,22 @@ public enum SveMaskPattern : byte
49814986
LargestMultipleOf3 = 30, // The largest multiple of 3.
49824987
All = 31 // All available (implicitly a multiple of two).
49834988
};
4989+
4990+
public enum SvePrefetchType : byte
4991+
{
4992+
LoadL1Temporal = 0,
4993+
LoadL1NonTemporal = 1,
4994+
LoadL2Temporal = 2,
4995+
LoadL2NonTemporal = 3,
4996+
LoadL3Temporal = 4,
4997+
LoadL3NonTemporal = 5,
4998+
StoreL1Temporal = 8,
4999+
StoreL1NonTemporal = 9,
5000+
StoreL2Temporal = 10,
5001+
StoreL2NonTemporal = 11,
5002+
StoreL3Temporal = 12,
5003+
StoreL3NonTemporal = 13
5004+
};
49845005
}
49855006
namespace System.Runtime.Intrinsics.X86
49865007
{

0 commit comments

Comments
 (0)