Skip to content

Commit 7d23d61

Browse files
Adding support for Avx512Vbmi.MultiShift (#103310)
1 parent 85a70c4 commit 7d23d61

File tree

93 files changed

+1079
-2231
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+1079
-2231
lines changed

src/coreclr/jit/emitxarch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19665,6 +19665,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
1966519665
case INS_vpermt2pd:
1966619666
case INS_vpermt2ps:
1966719667
case INS_vpermt2q:
19668+
case INS_vpmultishiftqb:
1966819669
case INS_vshuff32x4:
1966919670
case INS_vshuff64x2:
1967019671
case INS_vshufi32x4:

src/coreclr/jit/hwintrinsiclistxarch.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,7 @@ HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow,
11551155
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
11561156
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
11571157
// AVX512VBMI Intrinsics
1158+
HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
11581159
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
11591160
HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
11601161

@@ -1163,6 +1164,7 @@ HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2,
11631164
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
11641165
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
11651166
// AVX512VBMI.VL Intrinsics
1167+
HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
11661168
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
11671169
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
11681170
HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
@@ -1243,6 +1245,7 @@ HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractAdd,
12431245
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
12441246
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
12451247
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
1248+
HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
12461249
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
12471250
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
12481251
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
@@ -1305,6 +1308,7 @@ HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector128,
13051308
HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector256, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
13061309
HARDWARE_INTRINSIC(AVX10v1_V512, LeadingZeroCount, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
13071310
HARDWARE_INTRINSIC(AVX10v1_V512, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1311+
HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
13081312
HARDWARE_INTRINSIC(AVX10v1_V512, Or, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
13091313
HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
13101314
HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)

src/coreclr/jit/instrsxarch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_
877877
INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements
878878
INST3(vpermi2b, "permi2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x75), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index
879879
INST3(vpermt2b, "permt2b", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7D), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table
880+
INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table
880881

881882
INST3(LAST_AVX512_INSTRUCTION, "LAST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
882883

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.PlatformNotSupported.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,6 +1876,28 @@ internal Avx10v1() { }
18761876
/// </summary>
18771877
public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
18781878

1879+
/// <summary>
1880+
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
1881+
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
1882+
/// </summary>
1883+
public static Vector128<byte> MultiShift(Vector128<byte> control, Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
1884+
/// <summary>
1885+
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
1886+
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
1887+
/// </summary>
1888+
public static Vector128<sbyte> MultiShift(Vector128<sbyte> control, Vector128<long> value) { throw new PlatformNotSupportedException(); }
1889+
1890+
/// <summary>
1891+
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
1892+
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
1893+
/// </summary>
1894+
public static Vector256<byte> MultiShift(Vector256<byte> control, Vector256<ulong> value) { throw new PlatformNotSupportedException(); }
1895+
/// <summary>
1896+
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
1897+
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
1898+
/// </summary>
1899+
public static Vector256<sbyte> MultiShift(Vector256<sbyte> control, Vector256<long> value) { throw new PlatformNotSupportedException(); }
1900+
18791901
/// <summary>
18801902
/// __m256i _mm256_permutevar16x16_epi16 (__m256i a, __m256i b)
18811903
/// VPERMW ymm1 {k1}{z}, ymm2, ymm3/m256
@@ -3299,6 +3321,17 @@ internal V512() { }
32993321
/// </summary>
33003322
public static Vector512<ulong> MultiplyLow(Vector512<ulong> left, Vector512<ulong> right) { throw new PlatformNotSupportedException(); }
33013323

3324+
/// <summary>
3325+
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
3326+
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
3327+
/// </summary>
3328+
public static Vector512<byte> MultiShift(Vector512<byte> control, Vector512<ulong> value) { throw new PlatformNotSupportedException(); }
3329+
/// <summary>
3330+
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
3331+
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
3332+
/// </summary>
3333+
public static Vector512<sbyte> MultiShift(Vector512<sbyte> control, Vector512<long> value) { throw new PlatformNotSupportedException(); }
3334+
33023335
/// <summary>
33033336
/// __m512 _mm512_or_ps (__m512 a, __m512 b)
33043337
/// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1875,6 +1875,28 @@ internal Avx10v1() { }
18751875
/// </summary>
18761876
public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractScalar(a, b, c);
18771877

1878+
/// <summary>
1879+
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
1880+
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
1881+
/// </summary>
1882+
public static Vector128<byte> MultiShift(Vector128<byte> control, Vector128<ulong> value) => MultiShift(control, value);
1883+
/// <summary>
1884+
/// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b)
1885+
/// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
1886+
/// </summary>
1887+
public static Vector128<sbyte> MultiShift(Vector128<sbyte> control, Vector128<long> value) => MultiShift(control, value);
1888+
1889+
/// <summary>
1890+
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
1891+
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
1892+
/// </summary>
1893+
public static Vector256<byte> MultiShift(Vector256<byte> control, Vector256<ulong> value) => MultiShift(control, value);
1894+
/// <summary>
1895+
/// __m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b)
1896+
/// VPMULTISHIFTQB ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
1897+
/// </summary>
1898+
public static Vector256<sbyte> MultiShift(Vector256<sbyte> control, Vector256<long> value) => MultiShift(control, value);
1899+
18781900
/// <summary>
18791901
/// __m256i _mm256_permutevar16x16_epi16 (__m256i a, __m256i b)
18801902
/// VPERMW ymm1 {k1}{z}, ymm2, ymm3/m256
@@ -3289,6 +3311,17 @@ internal V512() { }
32893311
/// </summary>
32903312
public static Vector512<ulong> MultiplyLow(Vector512<ulong> left, Vector512<ulong> right) => MultiplyLow(left, right);
32913313

3314+
/// <summary>
3315+
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
3316+
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
3317+
/// </summary>
3318+
public static Vector512<byte> MultiShift(Vector512<byte> control, Vector512<ulong> value) => MultiShift(control, value);
3319+
/// <summary>
3320+
/// __m512i _mm512_multishift_epi64_epi8( __m512i a, __m512i b)
3321+
/// VPMULTISHIFTQB zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
3322+
/// </summary>
3323+
public static Vector512<sbyte> MultiShift(Vector512<sbyte> control, Vector512<long> value) => MultiShift(control, value);
3324+
32923325
/// <summary>
32933326
/// __m512 _mm512_or_ps (__m512 a, __m512 b)
32943327
/// VORPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst

0 commit comments

Comments
 (0)