diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 3a39d5f845..b2233929b8 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { vmull_u32(a, b) } +/// Polynomial multiply long +#[inline] +#[target_feature(enable = "neon,crypto")] +#[cfg_attr(test, assert_instr(pmull))] +pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")] + fn vmull_p64_(a: p64, b: p64) -> int8x16_t; + } + transmute(vmull_p64_(a, b)) +} + /// Polynomial multiply long #[inline] #[target_feature(enable = "neon")] @@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t { vmull_p8(a, b) } +/// Polynomial multiply long +#[inline] +#[target_feature(enable = "neon,crypto")] +#[cfg_attr(test, assert_instr(pmull))] +pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 { + vmull_p64(simd_extract(a, 1), simd_extract(b, 1)) +} + /// Multiply long #[inline] #[target_feature(enable = "neon")] @@ -12448,6 +12469,15 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmull_p64() { + let a: p64 = 15; + let b: p64 = 3; + let e: p128 = 17; + let r: p128 = transmute(vmull_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmull_high_p8() { let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16); @@ -12457,6 +12487,15 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmull_high_p64() { + let a: i64x2 = i64x2::new(1, 15); + let b: i64x2 = i64x2::new(1, 3); + let e: p128 = 17; + let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmull_high_n_s16() { let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 586a4fbe37..21588f621c 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -2214,9 +2214,10 @@ target = crypto aarch64 = pmull link-aarch64 = pmull64:p64:p64:p64:int8x16_t -arm = vmull -link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t -//generate p64:p64:p128 +// Because of the support status of llvm, vmull_p64 is currently only available on aarch64 +// arm = vmull +// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t +generate p64:p64:p128 /// Polynomial multiply long @@ -2242,8 +2243,8 @@ b = 1, 3 validate 17 target = crypto -aarch64 = pmull2 -//generate poly64x2_t:poly64x2_t:p128 +aarch64 = pmull +generate poly64x2_t:poly64x2_t:p128 /// Vector long multiply with scalar name = vmull