Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,14 @@ void HWIntrinsicInfo::lookupImmBounds(
}
break;

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
immLowerBound = (int)SVE_PRFOP_PLDL1KEEP;
immUpperBound = (int)SVE_PRFOP_CONST15;
break;

default:
unreached();
}
Expand Down
27 changes: 27 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
assert(hasImmediateOperand);
assert(HWIntrinsicInfo::HasEnumOperand(intrin.id));
if (intrin.op3->IsCnsIntOrI())
{
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize,
(insSvePrfop)intrin.op3->AsIntConCommon()->IconValue(), op1Reg,
op2Reg, 0);
}
else
{
assert(!intrin.op3->isContainedIntOrIImmed());

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const insSvePrfop prfop = (insSvePrfop)helper.ImmValue();
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize, prfop, op1Reg, op2Reg, 0);
}
}
break;
}

case NI_Vector64_ToVector128:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ HARDWARE_INTRINSIC(Sve, Negate,
HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3194,6 +3194,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
51 changes: 33 additions & 18 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down Expand Up @@ -1965,28 +1969,39 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (intrin.id == NI_Sve_StoreAndZip)
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
switch (intrin.id)
{
case NI_Sve_StoreAndZip:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
srcCount += BuildAddrUses(intrin.op2);
break;

if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}
default:
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
}
}
break;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,20 @@ public enum SveMaskPattern : byte
/// </summary>
All = 31 // All available (implicitly a multiple of two).
}

public enum SvePrefetchType : byte
{
SV_PLDL1KEEP = 0, // Temporal fetch the addressed location for reading, to L1 cache.
SV_PLDL1STRM = 1, // Streaming fetch the addressed location for reading, to L1 cache.
SV_PLDL2KEEP = 2, // Temporal fetch the addressed location for reading, to L2 cache.
SV_PLDL2STRM = 3, // Streaming fetch the addressed location for reading, to L2 cache.
SV_PLDL3KEEP = 4, // Temporal fetch the addressed location for reading, to L3 cache.
SV_PLDL3STRM = 5, // Streaming fetch the addressed location for reading, to L3 cache.
SV_PSTL1KEEP = 8, // Temporal fetch the addressed location for writing, to L1 cache.
SV_PSTL1STRM = 9, // Streaming fetch the addressed location for writing, to L1 cache.
SV_PSTL2KEEP = 10, // Temporal fetch the addressed location for writing, to L2 cache.
SV_PSTL2STRM = 11, // Streaming fetch the addressed location for writing, to L2 cache.
SV_PSTL3KEEP = 12, // Temporal fetch the addressed location for writing, to L3 cache.
SV_PSTL3STRM = 13 // Streaming fetch the addressed location for writing, to L3 cache.
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -3132,6 +3132,30 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }


/// Reverse all elements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3188,6 +3188,29 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) => PopCount(value);

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchBytes(mask, address, prefetchType);

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt16(mask, address, prefetchType);

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt32(mask, address, prefetchType);

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt64(mask, address, prefetchType);

/// Reverse all elements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4608,6 +4608,11 @@ internal Arm64() { }
public static System.Numerics.Vector<ulong> PopCount(System.Numerics.Vector<long> value) { throw null; }
public static System.Numerics.Vector<ulong> PopCount(System.Numerics.Vector<ulong> value) { throw null; }

public static unsafe void PrefetchBytes(System.Numerics.Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
public static unsafe void PrefetchInt16(System.Numerics.Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
public static unsafe void PrefetchInt32(System.Numerics.Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }
public static unsafe void PrefetchInt64(System.Numerics.Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw null; }

public static System.Numerics.Vector<byte> ReverseElement(System.Numerics.Vector<byte> value) { throw null; }
public static System.Numerics.Vector<double> ReverseElement(System.Numerics.Vector<double> value) { throw null; }
public static System.Numerics.Vector<short> ReverseElement(System.Numerics.Vector<short> value) { throw null; }
Expand Down Expand Up @@ -4940,6 +4945,22 @@ public enum SveMaskPattern : byte
LargestMultipleOf3 = 30, // The largest multiple of 3.
All = 31 // All available (implicitly a multiple of two).
};

public enum SvePrefetchType : byte
Copy link
Contributor Author

@TIHan TIHan Jun 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kunalspathak @tannergooding was this enum type approved? I ask because of the SV_PLDL1KEEP names.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see a note here about they not approved? @tannergooding can you confirm?

#94006 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#97831 approved the Prefetch* APIs.

The enum should have been approved as part of that, but wasn't looked at directly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enum names are different and were approved in #94007 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TIHan - can you update the names accordingly?

{
SV_PLDL1KEEP = 0, // Temporal fetch the addressed location for reading, to L1 cache.
SV_PLDL1STRM = 1, // Streaming fetch the addressed location for reading, to L1 cache.
SV_PLDL2KEEP = 2, // Temporal fetch the addressed location for reading, to L2 cache.
SV_PLDL2STRM = 3, // Streaming fetch the addressed location for reading, to L2 cache.
SV_PLDL3KEEP = 4, // Temporal fetch the addressed location for reading, to L3 cache.
SV_PLDL3STRM = 5, // Streaming fetch the addressed location for reading, to L3 cache.
SV_PSTL1KEEP = 8, // Temporal fetch the addressed location for writing, to L1 cache.
SV_PSTL1STRM = 9, // Streaming fetch the addressed location for writing, to L1 cache.
SV_PSTL2KEEP = 10, // Temporal fetch the addressed location for writing, to L2 cache.
SV_PSTL2STRM = 11, // Streaming fetch the addressed location for writing, to L2 cache.
SV_PSTL3KEEP = 12, // Temporal fetch the addressed location for writing, to L3 cache.
SV_PSTL3STRM = 13 // Streaming fetch the addressed location for writing, to L3 cache.
};
}
namespace System.Runtime.Intrinsics.X86
{
Expand Down
Loading