Skip to content

Add vmull_p64 and vmull_high_p64 for aarch64 #1157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
vmull_u32(a, b)
}

/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon,crypto")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
}
transmute(vmull_p64_(a, b))
}

/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon")]
Expand All @@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
vmull_p8(a, b)
}

/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon,crypto")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
}

/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -12448,6 +12469,15 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmull_p64() {
let a: p64 = 15;
let b: p64 = 3;
let e: p128 = 17;
let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_p8() {
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
Expand All @@ -12457,6 +12487,15 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_p64() {
let a: i64x2 = i64x2::new(1, 15);
let b: i64x2 = i64x2::new(1, 3);
let e: p128 = 17;
let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_s16() {
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
Expand Down
11 changes: 6 additions & 5 deletions crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -2214,9 +2214,10 @@ target = crypto

aarch64 = pmull
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
arm = vmull
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
//generate p64:p64:p128
// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
// arm = vmull
// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
generate p64:p64:p128


/// Polynomial multiply long
Expand All @@ -2242,8 +2243,8 @@ b = 1, 3
validate 17
target = crypto

aarch64 = pmull2
//generate poly64x2_t:poly64x2_t:p128
aarch64 = pmull
generate poly64x2_t:poly64x2_t:p128

/// Vector long multiply with scalar
name = vmull
Expand Down