From 8644798fc476f7b205354ebbc9a1c14562c5e588 Mon Sep 17 00:00:00 2001 From: Jacob Crawley Date: Thu, 14 Aug 2025 15:54:23 +0000 Subject: [PATCH 01/11] Implement SVE2 ConvertToSingleOdd and ConvertToSingleRoundToOdd Co-authored-by: @jacob-crawley --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 27 ++++++- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 + .../Arm/Sve2.PlatformNotSupported.cs | 20 +++++ .../src/System/Runtime/Intrinsics/Arm/Sve2.cs | 20 +++++ .../ref/System.Runtime.Intrinsics.cs | 2 + .../GenerateHWIntrinsicTests/Arm/Sve2Tests.cs | 8 +- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 76 ++++++++++++------- 7 files changed, 124 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 13c7163c4fcbb1..9f41e93e8f5aa7 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -722,6 +722,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(intrin.op3->IsVectorZero()); break; + case NI_Sve2_ConvertToSingleOdd: + case NI_Sve2_ConvertToSingleOddRoundToOdd: + embOpt = INS_OPTS_D_TO_S; + break; + default: break; } @@ -783,6 +788,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; + case NI_Sve2_ConvertToSingleOdd: + case NI_Sve2_ConvertToSingleOddRoundToOdd: + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + break; + default: assert(targetReg != embMaskOp2Reg); @@ -810,12 +820,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } } + // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first + // so the `insEmbMask` operation can be merged on top of it. else if (targetReg != falseReg) { - // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first - // so the `insEmbMask` operation can be merged on top of it. - if (falseReg != embMaskOp1Reg) + if ((intrinEmbMask.id == NI_Sve2_ConvertToSingleOdd) || + (intrinEmbMask.id == NI_Sve2_ConvertToSingleOddRoundToOdd)) + { + // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with + // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to + // select the active lanes. + GetEmitter()->emitIns_Mov(INS_fmov, EA_4BYTE, targetReg, embMaskOp1Reg, /* canSkip */ true); + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, + falseReg, opt); + } + else if (falseReg != embMaskOp1Reg) { // At the point, targetReg != embMaskOp1Reg != falseReg if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 56cc4d3c2d9e3e..4fc73e41aa168c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -342,6 +342,8 @@ HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve2, ConvertToDoubleOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_sve_fcvtlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve2, ConvertToSingleEvenRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtx, INS_sve_fcvtx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve2, ConvertToSingleOdd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, ConvertToSingleOddRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtxnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve2, DotProductRotateComplex, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex, -1, 5, {INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, {INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs index bcab1359869df7..7dc075973e1246 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs @@ -1196,6 +1196,26 @@ internal Arm64() { } /// public static Vector BitwiseSelectRightInverted(Vector select, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + // Down convert and narrow (top) + + /// + /// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// FCVTNT Ztied.S, Pg/M, Zop.D + /// FCVTNT Ztied.S, Pg/M, Zop.D + /// + public static Vector ConvertToSingleOdd(Vector even, Vector value) { throw new PlatformNotSupportedException(); } + + // Down convert, rounding to odd (top) + + /// + /// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// FCVTXNT Ztied.S, Pg/M, Zop.D + /// FCVTXNT Ztied.S, Pg/M, Zop.D + /// + public static Vector ConvertToSingleOddRoundToOdd(Vector even, Vector value) { throw new PlatformNotSupportedException(); } + // Complex dot product /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs index 815909fa63c7fc..c8e9696c0ed8be 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs @@ -1196,6 +1196,26 @@ internal Arm64() { } /// public static Vector BitwiseSelectRightInverted(Vector select, Vector left, Vector right) => BitwiseSelectRightInverted(select, left, right); + // Down convert and narrow (top) + + /// + /// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// FCVTNT Ztied.S, Pg/M, Zop.D + /// FCVTNT Ztied.S, Pg/M, Zop.D + /// + public static Vector ConvertToSingleOdd(Vector even, Vector value) => ConvertToSingleOdd(even, value); + + // Down convert, rounding to odd (top) + + /// + /// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op) + /// FCVTXNT Ztied.S, Pg/M, Zop.D + /// FCVTXNT Ztied.S, Pg/M, Zop.D + /// + public static Vector ConvertToSingleOddRoundToOdd(Vector even, Vector value) => ConvertToSingleOddRoundToOdd(even, value); + // Complex dot product /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 0775004609bc4b..aa591b4d1ea80d 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -6354,6 +6354,8 @@ internal Arm64() { } public static System.Numerics.Vector BitwiseSelectRightInverted(System.Numerics.Vector select, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector ConvertToDoubleOdd(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector ConvertToSingleEvenRoundToOdd(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector ConvertToSingleOdd(System.Numerics.Vector even, System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector ConvertToSingleOddRoundToOdd(System.Numerics.Vector even, System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector DotProductRotateComplex(System.Numerics.Vector op1, System.Numerics.Vector op2, System.Numerics.Vector op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; } public static System.Numerics.Vector DotProductRotateComplex(System.Numerics.Vector op1, System.Numerics.Vector op2, System.Numerics.Vector op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; } public static System.Numerics.Vector DotProductRotateComplexBySelectedIndex(System.Numerics.Vector op1, System.Numerics.Vector op2, System.Numerics.Vector op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte imm_index, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs b/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs index bfe40fee207c69..f96ddad6b2b180 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs @@ -222,9 +222,13 @@ public static (string templateFileName, Dictionary templateData) ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve2_BitwiseSelectRightInverted_uint", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "BitwiseSelectRightInverted", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "result[i] != Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])", ["GetIterResult"] = "Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve2_BitwiseSelectRightInverted_ulong", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "BitwiseSelectRightInverted", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "result[i] != Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])", ["GetIterResult"] = "Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])"}), - ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToDoubleOdd_double_float", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToDoubleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToDouble(firstOp[i * 2 + 1]) != result[i]", ["GetIterResult"] = "Helpers.ConvertToDouble(left[i * 2 + 1])"}), + ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToDoubleOdd_double_float", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToDoubleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToDouble(firstOp[i * 2 + 1]) != result[i]", ["GetIterResult"] = "Helpers.ConvertToDouble(left[i * 2 + 1])"}), - ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToSingleEvenRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleEvenRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(firstOp, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(left, i)"}), + ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToSingleEvenRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleEvenRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(firstOp, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(left, i)"}), + + ("SveVecBinOpDifferentRetType.template", new Dictionary { ["TestName"] = "Sve2_ConvertToSingleOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i)"}), + + ("SveVecBinOpDifferentRetType.template", new Dictionary {["TestName"] = "Sve2_ConvertToSingleOddRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOddRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i)"}), ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve2_DotProductRotateComplex_int_sbyte_0", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "DotProductRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["Op4BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm)"}), ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve2_DotProductRotateComplex_int_sbyte_1", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "DotProductRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["Op4BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm)"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index ab0388d112bb92..7191d56b878526 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -4328,43 +4328,57 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift) public static float AbsoluteDifference(float op1, float op2) => MathF.Abs(op1 - op2); - public static float ConvertToSingleEvenRoundToOdd(double[] value, int i) + private static float ConvertToSingleRoundToOdd(double val) { - if (i % 2 == 0) - { - double val = value[i / 2]; - float floatVal = (float)val; + float floatVal = (float)val; - float f = (float)val; + float f = (float)val; - // If val is NaN or Inf there’s nothing else to do - if (double.IsNaN(val) || double.IsInfinity(val)) - return f; + // If val is NaN or Inf there’s nothing else to do + if (double.IsNaN(val) || double.IsInfinity(val)) + return f; - // Detect the cases where the default cast rounded away from zero - if ((val > 0 && (double)f > val) || - (val < 0 && (double)f < val)) - { - // Move toward zero to get truncate() behaviour. - int bits = BitConverter.SingleToInt32Bits(f); - bits += (val > 0) ? -1 : +1; - f = BitConverter.Int32BitsToSingle(bits); - } + // Detect the cases where the default cast rounded away from zero + if ((val > 0 && (double)f > val) || + (val < 0 && (double)f < val)) + { + // Move toward zero to get truncate() behaviour. + int bits = BitConverter.SingleToInt32Bits(f); + bits += (val > 0) ? -1 : +1; + f = BitConverter.Int32BitsToSingle(bits); + } - // Round to odd, force the last bit of the mantissa to 1 if the conversion was inexact - if (val != (double)f) - { - int bits = BitConverter.SingleToInt32Bits(f); - bits |= 0x1; - f = BitConverter.Int32BitsToSingle(bits); - } + // Round to odd, force the last bit of the mantissa to 1 if the conversion was inexact + if (val != (double)f) + { + int bits = BitConverter.SingleToInt32Bits(f); + bits |= 0x1; + f = BitConverter.Int32BitsToSingle(bits); + } - return f; + return f; + } + public static float ConvertToSingleEvenRoundToOdd(double[] value, int i) + { + if (i % 2 == 0) + { + return ConvertToSingleRoundToOdd(value[i / 2]); } return 0f; } + + public static float ConvertToSingleOddRoundToOdd(float[] even, double[] op, int i) + { + if (i % 2 != 0) + { + return ConvertToSingleRoundToOdd(op[(i - 1) / 2]); + } + + return even[i]; + } + public static float FusedMultiplyAdd(float op1, float op2, float op3) => MathF.FusedMultiplyAdd(op2, op3, op1); public static float FusedMultiplyAddNegated(float op1, float op2, float op3) => MathF.FusedMultiplyAdd(-op2, op3, -op1); @@ -5874,6 +5888,16 @@ public static float[] ConvertToSingle(ulong[] op1) return result; } + public static float ConvertToSingleOdd(float[] even, double[] op, int i) + { + if (i % 2 == 0) + { + return even[i]; + } + + return (float)op[(i - 1) / 2]; + } + public static float ConvertToSingleUpper(float[] op1, double[] op2, int i) => i < op1.Length ? op1[i] : ConvertToSingle(op2[i - op1.Length]); public static double ConvertToDouble(float op1) => op1; From 3eb0933b8c350ae217cf4540f105c8b7850f0b74 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 21 Aug 2025 23:11:55 +0100 Subject: [PATCH 02/11] Remove redundant allocation --- patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 patch diff --git a/patch b/patch new file mode 100644 index 00000000000000..b7273c0119ef7b --- /dev/null +++ b/patch @@ -0,0 +1,13 @@ +diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +index 7191d56b878..2c61a28b1b8 100644 +--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs ++++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +@@ -4330,8 +4330,6 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift) + + private static float ConvertToSingleRoundToOdd(double val) + { +- float floatVal = (float)val; +- + float f = (float)val; + + // If val is NaN or Inf there’s nothing else to do From 88e0f99b43ea5274a4c43009434a4a8eb5330bf1 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 22 Aug 2025 12:55:46 +0100 Subject: [PATCH 03/11] Remove a redundant file --- patch | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 patch diff --git a/patch b/patch deleted file mode 100644 index b7273c0119ef7b..00000000000000 --- a/patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs -index 7191d56b878..2c61a28b1b8 100644 ---- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs -+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs -@@ -4330,8 +4330,6 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift) - - private static float ConvertToSingleRoundToOdd(double val) - { -- float floatVal = (float)val; -- - float f = (float)val; - - // If val is NaN or Inf there’s nothing else to do From 50d31ff4bc42f510c73cf08dbfc3fa154768a764 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 22 Aug 2025 12:57:37 +0100 Subject: [PATCH 04/11] Enable movprfx for the group containing fcvtxnt --- src/coreclr/jit/emitarm64sve.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 19 ++++--------------- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 2 -- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 83e367392ac9c7..dd0fd071e62035 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -18565,6 +18565,7 @@ void emitter::emitInsPairSanityCheck(instrDesc* firstId, instrDesc* secondId) case IF_SVE_CU_3A: // ., /M, . case IF_SVE_ES_3A: // ., /M, . case IF_SVE_EQ_3A: // ., /M, . + case IF_SVE_GQ_3A: // ., /M, . case IF_SVE_HO_3A: // .H, /M, .S case IF_SVE_HO_3B: // .D, /M, .S case IF_SVE_HO_3C: // .S, /M, .D diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9f41e93e8f5aa7..657b9db26a8c8b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -790,7 +790,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve2_ConvertToSingleOdd: case NI_Sve2_ConvertToSingleOddRoundToOdd: - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); break; default: @@ -820,23 +820,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } } - // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first - // so the `insEmbMask` operation can be merged on top of it. else if (targetReg != falseReg) { + // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first + // so the `insEmbMask` operation can be merged on top of it. - if ((intrinEmbMask.id == NI_Sve2_ConvertToSingleOdd) || - (intrinEmbMask.id == NI_Sve2_ConvertToSingleOddRoundToOdd)) - { - // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with - // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to - // select the active lanes. - GetEmitter()->emitIns_Mov(INS_fmov, EA_4BYTE, targetReg, embMaskOp1Reg, /* canSkip */ true); - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); - GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, - falseReg, opt); - } - else if (falseReg != embMaskOp1Reg) + if (falseReg != embMaskOp1Reg) { // At the point, targetReg != embMaskOp1Reg != falseReg if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 7191d56b878526..2c61a28b1b8fbc 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -4330,8 +4330,6 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift) private static float ConvertToSingleRoundToOdd(double val) { - float floatVal = (float)val; - float f = (float)val; // If val is NaN or Inf there’s nothing else to do From a25d143513fee4fe465f2d3711c21e6f8a90abef Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 28 Aug 2025 13:14:48 +0100 Subject: [PATCH 05/11] Avoid using movprfx and use explicit copy instead --- src/coreclr/jit/emitarm64sve.cpp | 1 - src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 24 +++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index dd0fd071e62035..83e367392ac9c7 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -18565,7 +18565,6 @@ void emitter::emitInsPairSanityCheck(instrDesc* firstId, instrDesc* secondId) case IF_SVE_CU_3A: // ., /M, . case IF_SVE_ES_3A: // ., /M, . case IF_SVE_EQ_3A: // ., /M, . - case IF_SVE_GQ_3A: // ., /M, . case IF_SVE_HO_3A: // .H, /M, .S case IF_SVE_HO_3B: // .D, /M, .S case IF_SVE_HO_3C: // .S, /M, .D diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 657b9db26a8c8b..9a8665dffb7429 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -790,7 +790,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve2_ConvertToSingleOdd: case NI_Sve2_ConvertToSingleOddRoundToOdd: - emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); + // TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`. + // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with + // `embMaskOp1Reg`. Thus, we need to perform move before the operation. + GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, embMaskOp1Reg, INS_OPTS_SCALABLE_S); + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; default: @@ -820,12 +824,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } } + // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first + // so the `insEmbMask` operation can be merged on top of it. else if (targetReg != falseReg) { - // If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first - // so the `insEmbMask` operation can be merged on top of it. - if (falseReg != embMaskOp1Reg) + if ((intrinEmbMask.id == NI_Sve2_ConvertToSingleOdd) || + (intrinEmbMask.id == NI_Sve2_ConvertToSingleOddRoundToOdd)) + { + // TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`. + // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with + // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to + // select the active lanes. + GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, embMaskOp1Reg, INS_OPTS_SCALABLE_S); + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, + falseReg, opt); + } + else if (falseReg != embMaskOp1Reg) { // At the point, targetReg != embMaskOp1Reg != falseReg if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)) From 423fac98cc1aa7757d68b18796d61b5c9f70c798 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 29 Aug 2025 09:50:24 +0100 Subject: [PATCH 06/11] Fix formatting --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9a8665dffb7429..ad6d62413e3077 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -793,7 +793,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`. // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation. - GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, embMaskOp1Reg, INS_OPTS_SCALABLE_S); + GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, + embMaskOp1Reg, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; @@ -836,7 +837,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to // select the active lanes. - GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, embMaskOp1Reg, INS_OPTS_SCALABLE_S); + GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, + embMaskOp1Reg, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, opt); From ee0c54ed3f70d0cdb92a954fddf7e1e285ba5527 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 2 Sep 2025 11:13:25 +0100 Subject: [PATCH 07/11] Use mov instead of orr --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index ad6d62413e3077..1d9ae4e57edddb 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -793,8 +793,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`. // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation. - GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, - embMaskOp1Reg, INS_OPTS_SCALABLE_S); + GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, + /* canSkip */ true); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; @@ -837,8 +837,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to // select the active lanes. - GetEmitter()->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, targetReg, embMaskOp1Reg, - embMaskOp1Reg, INS_OPTS_SCALABLE_S); + GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, + /* canSkip */ true); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, opt); From 30748fc3f0505d13fa4a60914f5f160f612d244e Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 2 Sep 2025 13:47:30 +0100 Subject: [PATCH 08/11] Fix failing tests --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 1d9ae4e57edddb..9ff0f3f7777464 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -794,7 +794,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation. GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, - /* canSkip */ true); + /* canSkip */ true, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; @@ -838,7 +838,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to // select the active lanes. GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, - /* canSkip */ true); + /* canSkip */ true, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, opt); From fa11f81133d5aed6b6f503006c2ef816d8648075 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 9 Oct 2025 10:02:05 +0100 Subject: [PATCH 09/11] Incorporate review comments --- .../Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs | 8 ++++---- src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs b/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs index f96ddad6b2b180..6ad73360c19b5c 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/Arm/Sve2Tests.cs @@ -222,13 +222,13 @@ public static (string templateFileName, Dictionary templateData) ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve2_BitwiseSelectRightInverted_uint", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "BitwiseSelectRightInverted", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "result[i] != Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])", ["GetIterResult"] = "Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve2_BitwiseSelectRightInverted_ulong", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "BitwiseSelectRightInverted", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "result[i] != Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])", ["GetIterResult"] = "Helpers.BitwiseSelectRightInverted(firstOp[i], secondOp[i], thirdOp[i])"}), - ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToDoubleOdd_double_float", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToDoubleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToDouble(firstOp[i * 2 + 1]) != result[i]", ["GetIterResult"] = "Helpers.ConvertToDouble(left[i * 2 + 1])"}), + ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary { ["TestName"] = "Sve2_ConvertToDoubleOdd_double_float", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToDoubleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToDouble(firstOp[i * 2 + 1]) != result[i]", ["GetIterResult"] = "Helpers.ConvertToDouble(left[i * 2 + 1])"}), - ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary {["TestName"] = "Sve2_ConvertToSingleEvenRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleEvenRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(firstOp, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(left, i)"}), + ("SveSimpleVecOpDiffRetTypeTest.template", new Dictionary { ["TestName"] = "Sve2_ConvertToSingleEvenRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleEvenRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(firstOp, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleEvenRoundToOdd(left, i)"}), - ("SveVecBinOpDifferentRetType.template", new Dictionary { ["TestName"] = "Sve2_ConvertToSingleOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i)"}), + ("SveVecBinOpDifferentRetType.template", new Dictionary { ["TestName"] = "Sve2_ConvertToSingleOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOdd(left, right, i)"}), - ("SveVecBinOpDifferentRetType.template", new Dictionary {["TestName"] = "Sve2_ConvertToSingleOddRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOddRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i)"}), + ("SveVecBinOpDifferentRetType.template", new Dictionary { ["TestName"] = "Sve2_ConvertToSingleOddRoundToOdd_float_double", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "ConvertToSingleOddRoundToOdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i) != result[i]", ["GetIterResult"] = "Helpers.ConvertToSingleOddRoundToOdd(left, right, i)"}), ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve2_DotProductRotateComplex_int_sbyte_0", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "DotProductRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["Op4BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm)"}), ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve2_DotProductRotateComplex_int_sbyte_1", ["Isa"] = "Sve2", ["LoadIsa"] = "Sve2", ["Method"] = "DotProductRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["Op4BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ConvertFunc"] = "", ["ValidateIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProductRotateComplex(first[i], second, 4 * i, third, Imm)"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 2c61a28b1b8fbc..30b877de4b5575 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -4356,6 +4356,7 @@ private static float ConvertToSingleRoundToOdd(double val) return f; } + public static float ConvertToSingleEvenRoundToOdd(double[] value, int i) { if (i % 2 == 0) @@ -4366,12 +4367,11 @@ public static float ConvertToSingleEvenRoundToOdd(double[] value, int i) return 0f; } - public static float ConvertToSingleOddRoundToOdd(float[] even, double[] op, int i) { if (i % 2 != 0) { - return ConvertToSingleRoundToOdd(op[(i - 1) / 2]); + return ConvertToSingleRoundToOdd(op[(i - 1) / 2]); } return even[i]; From d8ae927044d29ff04f61e681bd089e44c676e03b Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 22 Oct 2025 15:03:15 +0100 Subject: [PATCH 10/11] Add assert to detect overwrite to input op2 --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5bef408e9f0131..5216606a44edb5 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -852,6 +852,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to // select the active lanes. + assert(targetReg != embMaskOp2Reg); GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, /* canSkip */ true, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg); From da4933aeaea3f78924e77835d403029283dc23e6 Mon Sep 17 00:00:00 2001 From: SwapnilGaikwad Date: Wed, 22 Oct 2025 16:45:49 +0100 Subject: [PATCH 11/11] Improve assert check Co-authored-by: Tanner Gooding --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5216606a44edb5..9e90f6e182e7f5 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -852,7 +852,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // For these intrinsics we cannot use movprfx instruction to populate `targetReg` with // `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to // select the active lanes. - assert(targetReg != embMaskOp2Reg); + assert((targetReg != embMaskOp2Reg) || (embMaskOp1Reg == targetReg)); GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, /* canSkip */ true, INS_OPTS_SCALABLE_S); emitInsHelper(targetReg, maskReg, embMaskOp2Reg);