Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3459,6 +3459,10 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
assert(tree->OperRequiresCallFlag(this));
expectedFlags |= GTF_GLOB_REF;
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27315,6 +27315,10 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
return true;
}
Expand Down Expand Up @@ -27497,6 +27501,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
// Mark as a call and global reference, much as is done for GT_KEEPALIVE
gtFlags |= (GTF_CALL | GTF_GLOB_REF);
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,14 @@ void HWIntrinsicInfo::lookupImmBounds(
}
break;

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
immLowerBound = (int)SVE_PRFOP_PLDL1KEEP;
immUpperBound = (int)SVE_PRFOP_CONST15;
break;

default:
unreached();
}
Expand Down
29 changes: 27 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (intrin.category == HW_Category_Special)
{
assert(intrin.id == NI_ArmBase_Yield);

emitSize = EA_UNKNOWN;
opt = INS_OPTS_NONE;
}
Expand Down Expand Up @@ -1489,6 +1487,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
assert(hasImmediateOperand);
assert(HWIntrinsicInfo::HasEnumOperand(intrin.id));
if (intrin.op3->IsCnsIntOrI())
{
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize,
(insSvePrfop)intrin.op3->AsIntConCommon()->IconValue(), op1Reg,
op2Reg, 0);
}
else
{
assert(!intrin.op3->isContainedIntOrIImmed());

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const insSvePrfop prfop = (insSvePrfop)helper.ImmValue();
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize, prfop, op1Reg, op2Reg, 0);
}
}
break;
}

case NI_Vector64_ToVector128:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ HARDWARE_INTRINSIC(Sve, Negate,
HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1718,7 +1718,6 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
#if defined(TARGET_XARCH)
return INS_kmovq_msk;
#elif defined(TARGET_ARM64)
unreached(); // TODO-SVE: This needs testing
return INS_sve_mov;
#endif
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3194,6 +3194,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
51 changes: 33 additions & 18 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down Expand Up @@ -1965,28 +1969,39 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (intrin.id == NI_Sve_StoreAndZip)
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
switch (intrin.id)
{
case NI_Sve_StoreAndZip:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
srcCount += BuildAddrUses(intrin.op2);
break;

if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}
default:
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
}
}
break;
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,7 +2330,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call

bool isBackFilled = false;
unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
bool isStructArg = varTypeIsStruct(argSigType);
bool isStructArg = varTypeIsStruct(argSigType) && !varTypeIsMask(argx->gtEffectiveVal());
var_types structBaseType = TYP_STRUCT;
unsigned structSize = 0;
bool passStructByRef = false;
Expand Down Expand Up @@ -3178,7 +3178,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
argx->gtType = TYP_I_IMPL;
}

bool isStructArg = varTypeIsStruct(arg.GetSignatureType());
bool isStructArg = varTypeIsStruct(arg.GetSignatureType()) && !varTypeIsMask(arg.GetSignatureType());
GenTree* argObj = argx->gtEffectiveVal();
bool makeOutArgCopy = false;

Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/rationalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,11 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use,
assert(!operand->OperIsFieldList());

sigTyp = comp->impNormStructType(clsHnd);
arg = NewCallArg::Struct(operand, sigTyp, clsHnd);
if (varTypeIsMask(operand) && varTypeIsSIMD(sigTyp))
{
sigTyp = TYP_MASK;
}
arg = NewCallArg::Struct(operand, sigTyp, clsHnd);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,20 @@ public enum SveMaskPattern : byte
/// </summary>
All = 31 // All available (implicitly a multiple of two).
}

public enum SvePrefetchType : byte
{
SV_PLDL1KEEP = 0, // Temporal fetch the addressed location for reading, to L1 cache.
SV_PLDL1STRM = 1, // Streaming fetch the addressed location for reading, to L1 cache.
SV_PLDL2KEEP = 2, // Temporal fetch the addressed location for reading, to L2 cache.
SV_PLDL2STRM = 3, // Streaming fetch the addressed location for reading, to L2 cache.
SV_PLDL3KEEP = 4, // Temporal fetch the addressed location for reading, to L3 cache.
SV_PLDL3STRM = 5, // Streaming fetch the addressed location for reading, to L3 cache.
SV_PSTL1KEEP = 8, // Temporal fetch the addressed location for writing, to L1 cache.
SV_PSTL1STRM = 9, // Streaming fetch the addressed location for writing, to L1 cache.
SV_PSTL2KEEP = 10, // Temporal fetch the addressed location for writing, to L2 cache.
SV_PSTL2STRM = 11, // Streaming fetch the addressed location for writing, to L2 cache.
SV_PSTL3KEEP = 12, // Temporal fetch the addressed location for writing, to L3 cache.
SV_PSTL3STRM = 13 // Streaming fetch the addressed location for writing, to L3 cache.
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -3238,6 +3238,30 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }


/// Reverse all elements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3294,6 +3294,29 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) => PopCount(value);

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchBytes(mask, address, prefetchType);

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt16(mask, address, prefetchType);

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt32(mask, address, prefetchType);

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt64(mask, address, prefetchType);

/// Reverse all elements

Expand Down
Loading