Skip to content

Commit fefcb9e

Browse files
committed
AVX512FP16 Part 2: COMI intrinsics
the LLVM CMP intrinsics need `i1`
1 parent 60abdea commit fefcb9e

File tree

2 files changed

+277
-14
lines changed

2 files changed

+277
-14
lines changed

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -574,14 +574,6 @@
574574
* [ ] [`_mm_cmp_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
575575
* [ ] [`_mm_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
576576
* [ ] [`_mm_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
577-
* [ ] [`_mm_comi_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
578-
* [ ] [`_mm_comi_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
579-
* [ ] [`_mm_comieq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
580-
* [ ] [`_mm_comige_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
581-
* [ ] [`_mm_comigt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
582-
* [ ] [`_mm_comile_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
583-
* [ ] [`_mm_comilt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
584-
* [ ] [`_mm_comineq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
585577
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
586578
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
587579
* [ ] [`_mm_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
@@ -780,12 +772,6 @@
780772
* [ ] [`_mm_store_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
781773
* [ ] [`_mm_sub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
782774
* [ ] [`_mm_sub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
783-
* [ ] [`_mm_ucomieq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
784-
* [ ] [`_mm_ucomige_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
785-
* [ ] [`_mm_ucomigt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
786-
* [ ] [`_mm_ucomile_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
787-
* [ ] [`_mm_ucomilt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
788-
* [ ] [`_mm_ucomineq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
789775
</p></details>
790776

791777

crates/core_arch/src/x86/avx512fp16.rs

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,170 @@ pub unsafe fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
632632
)
633633
}
634634

635+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
636+
/// operand specified by imm8, and return the boolean result (0 or 1).
637+
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
638+
///
639+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
640+
#[inline]
641+
#[target_feature(enable = "avx512fp16")]
642+
#[rustc_legacy_const_generics(2, 3)]
643+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
644+
pub unsafe fn _mm_comi_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
645+
static_assert_sae!(SAE);
646+
vcomish(a, b, IMM8, SAE)
647+
}
648+
649+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
650+
/// operand specified by imm8, and return the boolean result (0 or 1).
651+
///
652+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
653+
#[inline]
654+
#[target_feature(enable = "avx512fp16")]
655+
#[rustc_legacy_const_generics(2)]
656+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
657+
pub unsafe fn _mm_comi_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> i32 {
658+
_mm_comi_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(a, b)
659+
}
660+
661+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
662+
/// the boolean result (0 or 1).
663+
///
664+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
665+
#[inline]
666+
#[target_feature(enable = "avx512fp16")]
667+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
668+
pub unsafe fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
669+
_mm_comi_sh::<_CMP_EQ_OS>(a, b)
670+
}
671+
672+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
673+
/// and return the boolean result (0 or 1).
674+
///
675+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
676+
#[inline]
677+
#[target_feature(enable = "avx512fp16")]
678+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
679+
pub unsafe fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
680+
_mm_comi_sh::<_CMP_GE_OS>(a, b)
681+
}
682+
683+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
684+
/// the boolean result (0 or 1).
685+
///
686+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
687+
#[inline]
688+
#[target_feature(enable = "avx512fp16")]
689+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
690+
pub unsafe fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
691+
_mm_comi_sh::<_CMP_GT_OS>(a, b)
692+
}
693+
694+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
695+
/// return the boolean result (0 or 1).
696+
///
697+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
698+
#[inline]
699+
#[target_feature(enable = "avx512fp16")]
700+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
701+
pub unsafe fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
702+
_mm_comi_sh::<_CMP_LE_OS>(a, b)
703+
}
704+
705+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
706+
/// the boolean result (0 or 1).
707+
///
708+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
709+
#[inline]
710+
#[target_feature(enable = "avx512fp16")]
711+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
712+
pub unsafe fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
713+
_mm_comi_sh::<_CMP_LT_OS>(a, b)
714+
}
715+
716+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
717+
/// the boolean result (0 or 1).
718+
///
719+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
720+
#[inline]
721+
#[target_feature(enable = "avx512fp16")]
722+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
723+
pub unsafe fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
724+
_mm_comi_sh::<_CMP_NEQ_OS>(a, b)
725+
}
726+
727+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
728+
/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
729+
///
730+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
731+
#[inline]
732+
#[target_feature(enable = "avx512fp16")]
733+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
734+
pub unsafe fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
735+
_mm_comi_sh::<_CMP_EQ_OQ>(a, b)
736+
}
737+
738+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
739+
/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
740+
///
741+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
742+
#[inline]
743+
#[target_feature(enable = "avx512fp16")]
744+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
745+
pub unsafe fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
746+
_mm_comi_sh::<_CMP_GE_OQ>(a, b)
747+
}
748+
749+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
750+
/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
751+
///
752+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
753+
#[inline]
754+
#[target_feature(enable = "avx512fp16")]
755+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
756+
pub unsafe fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
757+
_mm_comi_sh::<_CMP_GT_OQ>(a, b)
758+
}
759+
760+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
761+
/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
762+
///
763+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
764+
#[inline]
765+
#[target_feature(enable = "avx512fp16")]
766+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
767+
pub unsafe fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
768+
_mm_comi_sh::<_CMP_LE_OQ>(a, b)
769+
}
770+
771+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
772+
/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
773+
///
774+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
775+
#[inline]
776+
#[target_feature(enable = "avx512fp16")]
777+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
778+
pub unsafe fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
779+
_mm_comi_sh::<_CMP_LT_OQ>(a, b)
780+
}
781+
782+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
783+
/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
784+
///
785+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
786+
#[inline]
787+
#[target_feature(enable = "avx512fp16")]
788+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
789+
pub unsafe fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
790+
_mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
791+
}
792+
793+
#[allow(improper_ctypes)]
794+
extern "C" {
795+
#[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
796+
fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
797+
}
798+
635799
#[cfg(test)]
636800
mod tests {
637801
use crate::core_arch::x86::*;
@@ -992,4 +1156,117 @@ mod tests {
9921156
);
9931157
assert_eq_m512h(r, e);
9941158
}
1159+
1160+
#[simd_test(enable = "avx512fp16")]
1161+
unsafe fn test_mm_comi_round_sh() {
1162+
let a = _mm_set_sh(1.0);
1163+
let b = _mm_set_sh(1.0);
1164+
let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
1165+
assert_eq!(r, 1);
1166+
}
1167+
1168+
#[simd_test(enable = "avx512fp16")]
1169+
unsafe fn test_mm_comi_sh() {
1170+
let a = _mm_set_sh(1.0);
1171+
let b = _mm_set_sh(1.0);
1172+
let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
1173+
assert_eq!(r, 1);
1174+
}
1175+
1176+
#[simd_test(enable = "avx512fp16")]
1177+
unsafe fn test_mm_comieq_sh() {
1178+
let a = _mm_set_sh(1.0);
1179+
let b = _mm_set_sh(1.0);
1180+
let r = _mm_comieq_sh(a, b);
1181+
assert_eq!(r, 1);
1182+
}
1183+
1184+
#[simd_test(enable = "avx512fp16")]
1185+
unsafe fn test_mm_comige_sh() {
1186+
let a = _mm_set_sh(2.0);
1187+
let b = _mm_set_sh(1.0);
1188+
let r = _mm_comige_sh(a, b);
1189+
assert_eq!(r, 1);
1190+
}
1191+
1192+
#[simd_test(enable = "avx512fp16")]
1193+
unsafe fn test_mm_comigt_sh() {
1194+
let a = _mm_set_sh(2.0);
1195+
let b = _mm_set_sh(1.0);
1196+
let r = _mm_comigt_sh(a, b);
1197+
assert_eq!(r, 1);
1198+
}
1199+
1200+
#[simd_test(enable = "avx512fp16")]
1201+
unsafe fn test_mm_comile_sh() {
1202+
let a = _mm_set_sh(1.0);
1203+
let b = _mm_set_sh(2.0);
1204+
let r = _mm_comile_sh(a, b);
1205+
assert_eq!(r, 1);
1206+
}
1207+
1208+
#[simd_test(enable = "avx512fp16")]
1209+
unsafe fn test_mm_comilt_sh() {
1210+
let a = _mm_set_sh(1.0);
1211+
let b = _mm_set_sh(2.0);
1212+
let r = _mm_comilt_sh(a, b);
1213+
assert_eq!(r, 1);
1214+
}
1215+
1216+
#[simd_test(enable = "avx512fp16")]
1217+
unsafe fn test_mm_comineq_sh() {
1218+
let a = _mm_set_sh(1.0);
1219+
let b = _mm_set_sh(2.0);
1220+
let r = _mm_comineq_sh(a, b);
1221+
assert_eq!(r, 1);
1222+
}
1223+
1224+
#[simd_test(enable = "avx512fp16")]
1225+
unsafe fn test_mm_ucomieq_sh() {
1226+
let a = _mm_set_sh(1.0);
1227+
let b = _mm_set_sh(1.0);
1228+
let r = _mm_ucomieq_sh(a, b);
1229+
assert_eq!(r, 1);
1230+
}
1231+
1232+
#[simd_test(enable = "avx512fp16")]
1233+
unsafe fn test_mm_ucomige_sh() {
1234+
let a = _mm_set_sh(2.0);
1235+
let b = _mm_set_sh(1.0);
1236+
let r = _mm_ucomige_sh(a, b);
1237+
assert_eq!(r, 1);
1238+
}
1239+
1240+
#[simd_test(enable = "avx512fp16")]
1241+
unsafe fn test_mm_ucomigt_sh() {
1242+
let a = _mm_set_sh(2.0);
1243+
let b = _mm_set_sh(1.0);
1244+
let r = _mm_ucomigt_sh(a, b);
1245+
assert_eq!(r, 1);
1246+
}
1247+
1248+
#[simd_test(enable = "avx512fp16")]
1249+
unsafe fn test_mm_ucomile_sh() {
1250+
let a = _mm_set_sh(1.0);
1251+
let b = _mm_set_sh(2.0);
1252+
let r = _mm_ucomile_sh(a, b);
1253+
assert_eq!(r, 1);
1254+
}
1255+
1256+
#[simd_test(enable = "avx512fp16")]
1257+
unsafe fn test_mm_ucomilt_sh() {
1258+
let a = _mm_set_sh(1.0);
1259+
let b = _mm_set_sh(2.0);
1260+
let r = _mm_ucomilt_sh(a, b);
1261+
assert_eq!(r, 1);
1262+
}
1263+
1264+
#[simd_test(enable = "avx512fp16")]
1265+
unsafe fn test_mm_ucomineq_sh() {
1266+
let a = _mm_set_sh(1.0);
1267+
let b = _mm_set_sh(2.0);
1268+
let r = _mm_ucomineq_sh(a, b);
1269+
assert_eq!(r, 1);
1270+
}
1271+
9951272
}

0 commit comments

Comments
 (0)