diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile index fe5fe474ca..838017febb 100644 --- a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ xz-utils -RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz +RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def b/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def index 4d0924c5a1..4cce9d7a3c 100644 --- a/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def +++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2024 Intel Corporation. +# Copyright (C) 2024-2024 Intel Corporation. # # This software and the related documents are Intel copyrighted materials, and your # use of them is governed by the express license under which they were provided to @@ -9,27 +9,33 @@ # This software and the related documents are provided as is, with no express or # implied warranties, other than those that are expressly stated in the License. # +# The CPUID information in this file is for software enabling purposes only and +# it is not a full and accurate representation of the CPU under development which +# it represents. +# The CPUID information in this file is not a guarantee of the availability of +# features or characteristics in the final released CPU. +# # CPUID_VERSION = 1.0 # Input => Output # EAX ECX => EAX EBX ECX EDX 00000000 ******** => 00000024 68747541 444d4163 69746e65 -00000001 ******** => 000806f0 00100800 7ffaf3ff bfebfbff +00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000 00000003 ******** => 00000000 00000000 00000000 00000000 -00000004 00000000 => 7c004121 01c0003f 0000003f 00000000 #Deterministic Cache +00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache 00000004 00000001 => 7c004122 01c0003f 0000003f 00000000 -00000004 00000002 => 7c004143 03c0003f 000003ff 00000000 -00000004 00000003 => 7c0fc163 0280003f 0000dfff 00000004 +00000004 00000002 => 7c004143 03c0003f 000007ff 00000000 +00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004 00000004 00000004 => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT 00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power -00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features -00000007 00000001 => 18ee00bf 00000002 00000000 1d29cd3e +00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features +00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache 0000000a ******** => 07300403 00000000 00000000 00000603 -0000000b 00000000 => 00000001 00000002 00000100 00000000 #Extended Topology -0000000b 00000001 => 00000004 00000002 00000201 00000000 +0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology +0000000b 00000001 => 00000004 00000002 00000201 0000001e 0000000c ******** => 00000000 00000000 00000000 00000000 0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0 0000000d 00000001 => 0000001f 00000240 00000100 00000000 @@ -44,7 +50,11 @@ 00000019 ******** => 00000000 00000005 00000000 00000000 #Key Locker 0000001d 00000000 => 00000001 00000000 00000000 00000000 #AMX Tile 0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1 -0000001e 00000000 => 00000000 00004010 00000000 00000000 #AMX Tmul +0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul +0000001e 00000001 => 000001ff 00000000 00000000 00000000 +0000001f 00000000 => 00000001 00000002 00000100 0000001e +0000001f 00000001 => 00000007 00000070 00000201 0000001e +0000001f 00000002 => 00000000 00000000 00000002 0000001e 00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00200961 2c100000 @@ -56,6 +66,6 @@ 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 00003028 00000200 00000200 00000000 -# This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to -# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM` and -# `AVX512_VP2INTERSECT` was added in the CPUID. +# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to +# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`, +# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID. diff --git a/crates/std_detect/src/detect/arch/x86.rs b/crates/std_detect/src/detect/arch/x86.rs index 6c45c76e9d..7fdb72bb4a 100644 --- a/crates/std_detect/src/detect/arch/x86.rs +++ b/crates/std_detect/src/detect/arch/x86.rs @@ -89,6 +89,11 @@ features! { /// * `"amx-bf16"` /// * `"amx-fp16"` /// * `"amx-complex"` + /// * `"amx-avx512"` + /// * `"amx-fp8"` + /// * `"amx-movrs"` + /// * `"amx-tf32"` + /// * `"amx-transpose"` /// * `"f16c"` /// * `"fma"` /// * `"bmi1"` @@ -109,6 +114,8 @@ features! { /// * `"rtm"` /// * `"movbe"` /// * `"ermsb"` + /// * `"movrs"` + /// * `"xop"` /// /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[stable(feature = "simd_x86", since = "1.27.0")] @@ -177,8 +184,7 @@ features! { @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; /// AVX-512 VBMI (Vector Byte Manipulation Instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; - /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and - /// Quadword) + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni"; @@ -217,6 +223,16 @@ features! { /// AMX-FP16 (Float16 Operations) @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex"; /// AMX-COMPLEX (Complex number Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512"; + /// AMX-AVX512 (AVX512 operations extended to matrices) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8"; + /// AMX-FP8 (Float8 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs"; + /// AMX-MOVRS (Matrix MOVERS operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; + /// AMX-TF32 (TensorFloat32 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; + /// AMX-TRANSPOSE (Matrix Transpose Operations) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; @@ -253,6 +269,8 @@ features! { /// RTM, Intel (Restricted Transactional Memory) @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs"; + /// MOVRS (Move data with the read-shared hint) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; /// ERMSB, Enhanced REP MOVSB and STOSB @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop"; diff --git a/crates/std_detect/src/detect/os/x86.rs b/crates/std_detect/src/detect/os/x86.rs index bb6a44b643..e48d04ad00 100644 --- a/crates/std_detect/src/detect/os/x86.rs +++ b/crates/std_detect/src/detect/os/x86.rs @@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ebx, 9, Feature::ermsb); + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + // Detect if CPUID.19h available if bit::test(extended_features_ecx as usize, 23) { let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; @@ -250,14 +252,27 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_edx, 8, Feature::avx512vp2intersect); enable(extended_features_edx, 23, Feature::avx512fp16); enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { + eax: amx_feature_flags_eax, + .. + } = unsafe { __cpuid_count(0x1e_u32, 1) }; - if os_amx_support { - enable(extended_features_edx, 24, Feature::amx_tile); - enable(extended_features_edx, 25, Feature::amx_int8); - enable(extended_features_edx, 22, Feature::amx_bf16); - enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); - enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); - } + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); } } } diff --git a/crates/std_detect/tests/x86-specific.rs b/crates/std_detect/tests/x86-specific.rs index 5f4441f101..f41f400c10 100644 --- a/crates/std_detect/tests/x86-specific.rs +++ b/crates/std_detect/tests/x86-specific.rs @@ -6,7 +6,8 @@ sha512_sm_x86, x86_amx_intrinsics, xop_target_feature, - keylocker_x86 + keylocker_x86, + movrs_target_feature )] extern crate cupid; @@ -97,6 +98,15 @@ fn dump() { println!("xop: {:?}", is_x86_feature_detected!("xop")); println!("kl: {:?}", is_x86_feature_detected!("kl")); println!("widekl: {:?}", is_x86_feature_detected!("widekl")); + println!("movrs: {:?}", is_x86_feature_detected!("movrs")); + println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8")); + println!( + "amx-transpose: {:?}", + is_x86_feature_detected!("amx-transpose") + ); + println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32")); + println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512")); + println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs")); } #[cfg(feature = "std_detect_env_override")]