Skip to content

[AArch64][SVE] Refactor getPTrue to return splat(1) when pattern=all. #139236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void func(int *restrict a, int *restrict b) {
// CHECK256-COUNT-8: str
// CHECK512-COUNT-4: str
// CHECK1024-COUNT-2: str
// CHECK2048-COUNT-1: st1w
// CHECK2048-COUNT-1: str
#pragma clang loop vectorize(enable)
for (int i = 0; i < 64; ++i)
a[i] += b[i];
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5725,8 +5725,8 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {

static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
return DAG.getConstant(1, DL, MVT::nxv1i1);
if (Pattern == AArch64SVEPredPattern::all)
return DAG.getConstant(1, DL, VT);
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
Expand Down Expand Up @@ -25030,7 +25030,7 @@ static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG) {
if (AnyPred.getOpcode() == AArch64ISD::REINTERPRET_CAST)
AnyPred = AnyPred.getOperand(0);

if (TruePred != AnyPred && TruePred.getOpcode() != AArch64ISD::PTRUE)
if (TruePred != AnyPred && !isAllActivePredicate(DAG, TruePred))
return SDValue();

SDValue LastB = Op->getOperand(0);
Expand Down Expand Up @@ -28568,7 +28568,7 @@ static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
}
}

// Return a PTRUE with active lanes corresponding to the extent of VT.
// Return a predicate with active lanes corresponding to the extent of VT.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isFixedLengthVector() &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,8 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr %
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.d, vl8
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,7 @@ define void @extract_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec, ptr %p) nounwi
; CHECK-LABEL: extract_fixed_v4i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%retval = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> %vec, i64 4)
store <4 x i64> %retval, ptr %p
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define void @st1d_fixed(ptr %ptr) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20]
; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: str z0, [x19]
; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #160
; CHECK-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,11 +544,10 @@ define void @extract_subvector_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 {
; CHECK-LABEL: extract_subvector_legalization_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: adrp x8, .LCPI40_0
; CHECK-NEXT: add x8, x8, :lo12:.LCPI40_0
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: ldr z0, [x8]
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ target triple = "aarch64-unknown-linux-gnu"
define void @fp_convert_combine_crash(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fp_convert_combine_crash:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.s, #8.00000000
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmov z1.s, #8.00000000
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: fmul z0.s, z0.s, z1.s
; CHECK-NEXT: fmul z0.s, z1.s, z0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%f = load <8 x float>, ptr %a
%mul.i = fmul <8 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00,
Expand Down
25 changes: 12 additions & 13 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,31 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w29, -48
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x10, sp, #176
; CHECK-NEXT: add x8, sp, #48
; CHECK-NEXT: add x9, sp, #144
; CHECK-NEXT: add x20, sp, #176
; CHECK-NEXT: ldr x15, [sp, #104]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x10]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
; CHECK-NEXT: ldr z3, [x10]
; CHECK-NEXT: ldr z0, [x8]
; CHECK-NEXT: add x8, sp, #112
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8]
; CHECK-NEXT: ldur q4, [sp, #88]
; CHECK-NEXT: ldr z2, [x9]
; CHECK-NEXT: ldr z1, [x8]
; CHECK-NEXT: add x20, sp, #176
; CHECK-NEXT: ldp x9, x8, [sp, #328]
; CHECK-NEXT: ldr x19, [sp, #272]
; CHECK-NEXT: ldr x15, [sp, #104]
; CHECK-NEXT: ldp x11, x10, [sp, #312]
; CHECK-NEXT: ldur q4, [sp, #88]
; CHECK-NEXT: ldp x13, x12, [sp, #296]
; CHECK-NEXT: ldr x19, [sp, #272]
; CHECK-NEXT: ldp x18, x14, [sp, #280]
; CHECK-NEXT: ldp x16, x17, [sp, #208]
; CHECK-NEXT: ldp x21, x22, [sp, #352]
; CHECK-NEXT: st1d { z3.d }, p0, [x20]
; CHECK-NEXT: str z3, [x20]
; CHECK-NEXT: add x20, sp, #144
; CHECK-NEXT: st1d { z2.d }, p0, [x20]
; CHECK-NEXT: str z2, [x20]
; CHECK-NEXT: add x20, sp, #112
; CHECK-NEXT: st1d { z1.d }, p0, [x20]
; CHECK-NEXT: str z1, [x20]
; CHECK-NEXT: add x20, sp, #48
; CHECK-NEXT: st1d { z0.d }, p0, [x20]
; CHECK-NEXT: str z0, [x20]
; CHECK-NEXT: stp x21, x22, [sp, #352]
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: stp x19, x18, [sp, #272]
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s

; REQUIRES: asserts
Expand All @@ -9,16 +9,15 @@ target triple = "aarch64-unknown-linux-gnu"
; accessing fixed width objects.
define void @foo(ptr %a) #0 {
; CHECK-LABEL: foo:
; CHECK: SelectionDAG has 15 nodes:
; CHECK: SelectionDAG has 13 nodes:
; CHECK-NEXT: t0: ch,glue = EntryToken
; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t21: nxv2i64,ch = LDR_ZXI<Mem:(volatile load (<vscale x 1 x s128>) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t6: i64 = ADDXri TargetFrameIndex:i64<0>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, t6, TargetConstant:i64<0>, t18:1
; CHECK-NEXT: t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
; CHECK-NEXT: t10: ch = RET_ReallyLR t16
; CHECK-NEXT: t22: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r0, align 64)> t21, t6, TargetConstant:i64<0>, t21:1
; CHECK-NEXT: t23: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r1, align 64)> t21, t8, TargetConstant:i64<0>, t22
; CHECK-NEXT: t10: ch = RET_ReallyLR t23
; CHECK-EMPTY:
entry:
%r0 = alloca <8 x i64>
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,10 @@ define void @v8i32(ptr %ldptr, ptr %stptr) {
;
; CHECK-256-LABEL: v8i32:
; CHECK-256: // %bb.0:
; CHECK-256-NEXT: ptrue p0.s
; CHECK-256-NEXT: ld1w { z0.s }, p0/z, [x0, #2, mul vl]
; CHECK-256-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
; CHECK-256-NEXT: st1w { z0.s }, p0, [x1, #2, mul vl]
; CHECK-256-NEXT: st1w { z1.s }, p0, [x1, #1, mul vl]
; CHECK-256-NEXT: ldr z0, [x0, #2, mul vl]
; CHECK-256-NEXT: ldr z1, [x0, #1, mul vl]
; CHECK-256-NEXT: str z0, [x1, #2, mul vl]
; CHECK-256-NEXT: str z1, [x1, #1, mul vl]
; CHECK-256-NEXT: ret
;
; CHECK-512-LABEL: v8i32:
Expand Down Expand Up @@ -437,8 +436,7 @@ define void @v8i32_vscale(ptr %0) {
; CHECK-256-LABEL: v8i32_vscale:
; CHECK-256: // %bb.0:
; CHECK-256-NEXT: mov z0.s, #1 // =0x1
; CHECK-256-NEXT: ptrue p0.s
; CHECK-256-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl]
; CHECK-256-NEXT: str z0, [x0, #2, mul vl]
; CHECK-256-NEXT: ret
;
; CHECK-512-LABEL: v8i32_vscale:
Expand Down
43 changes: 19 additions & 24 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ target triple = "aarch64-unknown-linux-gnu"
define void @add_v64i8(ptr %a, ptr %b) #0 {
; CHECK-LABEL: add_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x1]
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i8>, ptr %a
%op2 = load <64 x i8>, ptr %b
Expand All @@ -22,11 +21,10 @@ define void @add_v64i8(ptr %a, ptr %b) #0 {
define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: add_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x1]
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i16>, ptr %a
%op2 = load <32 x i16>, ptr %b
Expand All @@ -38,10 +36,10 @@ define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 {
define void @abs_v16i32(ptr %a) #0 {
; CHECK-LABEL: abs_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false)
Expand All @@ -52,10 +50,10 @@ define void @abs_v16i32(ptr %a) #0 {
define void @abs_v8i64(ptr %a) #0 {
; CHECK-LABEL: abs_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false)
Expand All @@ -66,11 +64,10 @@ define void @abs_v8i64(ptr %a) #0 {
define void @fadd_v32f16(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x1]
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x half>, ptr %a
%op2 = load <32 x half>, ptr %b
Expand All @@ -82,11 +79,10 @@ define void @fadd_v32f16(ptr %a, ptr %b) #0 {
define void @fadd_v16f32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x1]
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x float>, ptr %a
%op2 = load <16 x float>, ptr %b
Expand All @@ -98,11 +94,10 @@ define void @fadd_v16f32(ptr %a, ptr %b) #0 {
define void @fadd_v8f64(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x1]
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x double>, ptr %a
%op2 = load <8 x double>, ptr %b
Expand Down
Loading
Loading