From 7ba35bb7bd2bae6f0d442cc187cb6210d08019ea Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 24 Feb 2025 10:45:38 +0000 Subject: [PATCH 1/4] Add tests --- .../AArch64/single_early_exit_unsafe_ptrs.ll | 442 ++++++++++++++++++ .../RISCV/single_early_exit_unsafe_ptrs.ll | 398 ++++++++++++++++ .../single_early_exit_unsafe_ptrs.ll | 42 +- 3 files changed, 881 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll new file mode 100644 index 0000000000000..4f7b120643763 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll @@ -0,0 +1,442 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %stride2 = mul i64 %index, 2 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2 + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %mul = mul i64 %index, %stride + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 4 + %cmp3 = icmp eq i32 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK: [[LOOP_INC]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK: [[LOOP_END]]: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 1, %[[LOOP]] ], [ [[LD2]], %[[LOOP_INC]] ] +; CHECK-NEXT: ret i8 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i8 [ 1, %loop ], [ %ld2, %loop.inc ] + ret i8 %retval +} + + +attributes #0 = { "target-features"="+sve" vscale_range(1,16) } +attributes #1 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll new file mode 100644 index 0000000000000..929cc5b84d922 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \ +; RUN: -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f | FileCheck %s + +target triple = "riscv64" + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %stride2 = mul i64 %index, 2 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2 + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %mul = mul i64 %index, %stride + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low( +; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 4 + %cmp3 = icmp eq i32 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} + + +attributes #0 = { vscale_range(2,1024) } diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll index c68eeac19c9ec..23362004b1b2a 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s declare void @init_mem(ptr, i64); @@ -141,3 +141,43 @@ loop.end: %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] ret i64 %retval } + + +define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) { +; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( +; CHECK-SAME: ptr [[P1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %cmp3 = icmp eq i8 %ld1, 3 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ] + ret i64 %retval +} From d47bfd7120aacac1729fedf9f5a767188f45b70d Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 24 Feb 2025 10:46:01 +0000 Subject: [PATCH 2/4] [LoopVectorize] Perform loop versioning for some early exit loops When attempting to vectorise a loop with an uncountable early exit, we attempt to discover if all the loads in the loop are known to be dereferenceable. If at least one load could potentially fault then we abandon vectorisation. This patch adds support for vectorising loops with one potentially faulting load by versioning the loop based on the load pointer alignment. It is required that the vector load must always fault on the first lane, i.e. the load should not straddle a page boundary. Doing so ensures that the behaviour of the vector and scalar loops is identical, i.e. if a load does fault it will fault at the same scalar iteration. Such vectorisation depends on the following conditions being met: 1. The max vector width must not exceed the minimum page size. This is done by adding a getMaxSafeVectorWidthInBits wrapper that checks if we have an uncountable early exit. For scalable vectors we must be able to determine the maximum possible value of vscale. 2. The size of the loaded type must be a power of 2. This is checked during legalisation. 3. The VF must be a power of two (so that the vector width can divide wholly into the page size which is also power of 2). For fixed-width vectors this is always true, and for scalable vectors we query the TTI hook isVScaleKnownToBeAPowerOfTwo. If the effective runtime VF could change during the loop then this cannot be vectorised via loop versioning. 4. The load pointer must be aligned to a multiple of the vector width. (NOTE: interleaving is currently disabled for these early exit loops.) We add a runtime check to ensure this is true. --- llvm/include/llvm/Analysis/Loads.h | 6 - .../Vectorize/LoopVectorizationLegality.h | 33 +- llvm/lib/Analysis/Loads.cpp | 15 - .../Vectorize/LoopVectorizationLegality.cpp | 85 ++- .../Transforms/Vectorize/LoopVectorize.cpp | 70 +- .../AArch64/single_early_exit_unsafe_ptrs.ll | 722 +++++++++++++++--- .../RISCV/single_early_exit_unsafe_ptrs.ll | 97 ++- .../single_early_exit_unsafe_ptrs.ll | 2 +- 8 files changed, 871 insertions(+), 159 deletions(-) diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 639070c07897b..224c936bf161e 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -88,12 +88,6 @@ bool isDereferenceableAndAlignedInLoop( AssumptionCache *AC = nullptr, SmallVectorImpl *Predicates = nullptr); -/// Return true if the loop \p L cannot fault on any iteration and only -/// contains read-only memory accesses. -bool isDereferenceableReadOnlyLoop( - Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates = nullptr); - /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index e959d93b57275..b4bf528472c5c 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -382,11 +382,18 @@ class LoopVectorizationLegality { const LoopAccessInfo *getLAI() const { return LAI; } bool isSafeForAnyVectorWidth() const { - return LAI->getDepChecker().isSafeForAnyVectorWidth(); + return LAI->getDepChecker().isSafeForAnyVectorWidth() && + (!hasUncountableEarlyExit() || !getNumPotentiallyFaultingPointers()); } uint64_t getMaxSafeVectorWidthInBits() const { - return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); + uint64_t MaxSafeVectorWidth = + LAI->getDepChecker().getMaxSafeVectorWidthInBits(); + // The legalizer bails out if getMinPageSize does not return a value. + if (hasUncountableEarlyExit() && getNumPotentiallyFaultingPointers()) + MaxSafeVectorWidth = + std::min(MaxSafeVectorWidth, uint64_t(*TTI->getMinPageSize()) * 8); + return MaxSafeVectorWidth; } /// Returns true if the loop has exactly one uncountable early exit, i.e. an @@ -419,6 +426,19 @@ class LoopVectorizationLegality { unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } + /// Return the number of pointers in the loop that could potentially fault in + /// a loop with uncountable early exits. + unsigned getNumPotentiallyFaultingPointers() const { + return PotentiallyFaultingPtrs.size(); + } + + /// Return a vector of all potentially faulting pointers in a loop with + /// uncountable early exits. + const SmallVectorImpl> * + getPotentiallyFaultingPointers() const { + return &PotentiallyFaultingPtrs; + } + /// Returns a HistogramInfo* for the given instruction if it was determined /// to be part of a load -> update -> store sequence where multiple lanes /// may be working on the same memory address. @@ -524,6 +544,11 @@ class LoopVectorizationLegality { /// additional cases safely. bool isVectorizableEarlyExitLoop(); + /// Returns true if all loads in the loop contained in \p Loads can be + /// analyzed as potentially faulting. Any loads that may fault are added to + /// the member variable PotentiallyFaultingPtrs. + bool analyzePotentiallyFaultingLoads(SmallVectorImpl *Loads); + /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know @@ -642,6 +667,10 @@ class LoopVectorizationLegality { /// Keep track of the loop edge to an uncountable exit, comprising a pair /// of (Exiting, Exit) blocks, if there is exactly one early exit. std::optional> UncountableEdge; + + /// Keep a record of all potentially faulting pointers in loops with + /// uncountable early exits. + SmallVector, 4> PotentiallyFaultingPtrs; }; } // namespace llvm diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index b461c41d29e84..304bdcd1fba25 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -816,18 +816,3 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, return isPointerAlwaysReplaceable(From, To, DL); } - -bool llvm::isDereferenceableReadOnlyLoop( - Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates) { - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (auto *LI = dyn_cast(&I)) { - if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) - return false; - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) - return false; - } - } - return true; -} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 420cbc5384ce4..ff3954d556e39 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1602,6 +1602,43 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( return Result; } +bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads( + SmallVectorImpl *Loads) { + LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop " + "with uncountable early exit:\n"); + for (LoadInst *LI : *Loads) { + LLVM_DEBUG(dbgs() << "LV: Load: " << *LI << '\n'); + Value *Ptr = LI->getPointerOperand(); + if (!Ptr) + return false; + const SCEV *PtrExpr = PSE.getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); + // TODO: Deal with loop invariant pointers. + if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) + return false; + auto Step = dyn_cast(AR->getStepRecurrence(*PSE.getSE())); + if (!Step) + return false; + const SCEV *Start = AR->getStart(); + + // Make sure the step is positive and matches the object size in memory. + // TODO: Extend this to cover more cases. + auto &DL = LI->getDataLayout(); + APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), + DL.getTypeStoreSize(LI->getType()).getFixedValue()); + + // Also discard element sizes that are not a power of 2, since the loop + // vectorizer can only perform loop versioning with pointer alignment + // checks for vector loads that are power-of-2 in size. + if (EltSize != Step->getAPInt() || !EltSize.isPowerOf2()) + return false; + + LLVM_DEBUG(dbgs() << "LV: SCEV for Load Ptr: " << *Start << '\n'); + PotentiallyFaultingPtrs.push_back({Start, LI->getType()}); + } + return true; +} + bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { BasicBlock *LatchBB = TheLoop->getLoopLatch(); if (!LatchBB) { @@ -1706,6 +1743,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } }; + Predicates.clear(); + SmallVector NonDerefLoads; for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { if (I.mayWriteToMemory()) { @@ -1715,30 +1754,52 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { "Cannot vectorize early exit loop with writes to memory", "WritesInEarlyExitLoop", ORE, TheLoop); return false; - } else if (!IsSafeOperation(&I)) { + } else if (I.mayThrow() || !IsSafeOperation(&I)) { reportVectorizationFailure("Early exit loop contains operations that " "cannot be speculatively executed", "UnsafeOperationsEarlyExitLoop", ORE, TheLoop); return false; + } else if (I.mayReadFromMemory()) { + auto *LI = dyn_cast(&I); + bool UnsafeRead = false; + if (!LI) + UnsafeRead = true; + else if (!isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), + *DT, AC, &Predicates)) { + if (LI->getParent() != TheLoop->getHeader()) + UnsafeRead = true; + else + NonDerefLoads.push_back(LI); + } + + if (UnsafeRead) { + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } } } + if (!NonDerefLoads.empty()) { + if (!TTI->getMinPageSize() || + !analyzePotentiallyFaultingLoads(&NonDerefLoads)) { + PotentiallyFaultingPtrs.clear(); + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } + LLVM_DEBUG(dbgs() << "We can vectorize the loop with runtime checks.\n"); + } + // The vectoriser cannot handle loads that occur after the early exit block. assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. - Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { - reportVectorizationFailure( - "Loop may fault", - "Cannot vectorize potentially faulting early exit loop", - "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); - return false; - } - [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); // Since we have an exact exit count for the latch and the early exit diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e8a5db28ea0a4..fd38fb4e0a42f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -401,6 +401,12 @@ static cl::opt EnableEarlyExitVectorization( cl::desc( "Enable vectorization of early exit loops with uncountable exits.")); +static cl::opt MaxNumPotentiallyFaultingPointers( + "max-num-faulting-pointers", cl::init(0), cl::Hidden, + cl::desc( + "The maximum number of potentially faulting pointers we permit when " + "vectorizing loops with uncountable exits.")); + // Likelyhood of bypassing the vectorized loop because assumptions about SCEV // variables not overflowing do not hold. See `emitSCEVChecks`. static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}; @@ -2163,6 +2169,27 @@ class GeneratedRTChecks { }; } // namespace +static void addPointerAlignmentChecks( + const SmallVectorImpl> *Ptrs, Function *F, + PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF, + unsigned IC) { + ScalarEvolution *SE = PSE.getSE(); + const DataLayout &DL = SE->getDataLayout(); + + for (auto Ptr : *Ptrs) { + Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType()); + APInt EltSize(PtrIntType->getScalarSizeInBits(), + DL.getTypeStoreSize(Ptr.second).getFixedValue()); + const SCEV *Start = SE->getPtrToIntExpr(Ptr.first, PtrIntType); + const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF * IC); + const SCEV *Align = + SE->getMulExpr(ScevEC, SE->getConstant(EltSize), + (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW)); + const SCEV *Rem = SE->getURemExpr(Start, Align); + PSE.addPredicate(*(SE->getEqualPredicate(Rem, SE->getZero(PtrIntType)))); + } +} + static bool useActiveLaneMask(TailFoldingStyle Style) { return Style == TailFoldingStyle::Data || Style == TailFoldingStyle::DataAndControlFlow || @@ -3842,6 +3869,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() { return false; } + if (Legal->hasUncountableEarlyExit() && + Legal->getNumPotentiallyFaultingPointers() && + !TTI.isVScaleKnownToBeAPowerOfTwo()) { + reportVectorizationInfo("Cannot vectorize potentially faulting early exit " + "loop with scalable vectors.", + "ScalableVFUnfeasible", ORE, TheLoop); + return false; + } + IsScalableVectorizationAllowed = true; return true; } @@ -10508,11 +10544,25 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { - reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not enabled", - "UncountableEarlyExitLoopsDisabled", ORE, L); - return false; + if (LVL.hasUncountableEarlyExit()) { + if (!EnableEarlyExitVectorization) { + reportVectorizationFailure("Auto-vectorization of loops with uncountable " + "early exit is not enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); + return false; + } + + unsigned NumPotentiallyFaultingPointers = + LVL.getNumPotentiallyFaultingPointers(); + if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) { + reportVectorizationFailure("Not worth vectorizing loop with uncountable " + "early exit, due to number of potentially " + "faulting loads", + "UncountableEarlyExitMayFault", ORE, L); + return false; + } else if (NumPotentiallyFaultingPointers) + LLVM_DEBUG(dbgs() << "LV: Need to version early-exit vector loop with " + << "pointer alignment checks.\n"); } // Entrance to the VPlan-native vectorization path. Outer loops are processed @@ -10663,8 +10713,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned SelectedIC = std::max(IC, UserIC); // Optimistically generate runtime checks if they are needed. Drop them if // they turn out to not be profitable. - if (VF.Width.isVector() || SelectedIC > 1) + if (VF.Width.isVector() || SelectedIC > 1) { + if (LVL.getNumPotentiallyFaultingPointers()) { + assert(!CM.foldTailWithEVL() && + "Explicit vector length unsupported for early exit loops and " + "potentially faulting loads"); + addPointerAlignmentChecks(LVL.getPotentiallyFaultingPointers(), F, PSE, + TTI, VF.Width, SelectedIC); + } Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); + } // Check if it is profitable to vectorize with runtime checks. bool ForceVectorization = diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll index 4f7b120643763..2163df92c83ad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5 -; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,MAX1 +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=2 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,MAX2 target triple = "aarch64-unknown-linux-gnu" @@ -10,19 +13,68 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP12]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 3, [[N_VEC]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP19]], splat (i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] +; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]] +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: br i1 [[TMP21]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[VECTOR_EARLY_EXIT]]: +; CHECK-NEXT: br label %[[LOOP_END]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP15]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] ; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -47,29 +99,121 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 -; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] -; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; MAX1-SAME: ) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 +; MAX1-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 +; MAX1-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; MAX1-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas( +; MAX2-SAME: ) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4 +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; MAX2-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: br i1 true, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP18]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 16 +; MAX2-NEXT: [[TMP21:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP22:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP23:%.*]] = mul [[TMP22]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP23]] +; MAX2-NEXT: [[TMP24:%.*]] = mul i64 1, [[TMP20]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP24]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT5:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX3]] +; MAX2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP25]] +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP27]], align 1 +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP25]] +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP29]], align 1 +; MAX2-NEXT: [[TMP30:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD4]] +; MAX2-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], [[TMP20]] +; MAX2-NEXT: [[TMP31:%.*]] = xor [[TMP30]], splat (i1 true) +; MAX2-NEXT: [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP31]]) +; MAX2-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP34:%.*]] = or i1 [[TMP32]], [[TMP33]] +; MAX2-NEXT: br i1 [[TMP34]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP32]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] ; entry: %p1 = alloca [42 x i8] @@ -99,25 +243,116 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( -; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] -; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; MAX1-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; MAX2-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP23:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP24:%.*]] = mul [[TMP23]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP24]] +; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP28]], align 1 +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 1 +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP32:%.*]] = xor [[TMP31]], splat (i1 true) +; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) +; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]] +; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop @@ -143,25 +378,116 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( -; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] -; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] +; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: [[TMP23:%.*]] = call @llvm.stepvector.nxv16i64() +; MAX2-NEXT: [[TMP24:%.*]] = mul [[TMP23]], splat (i64 1) +; MAX2-NEXT: [[INDUCTION:%.*]] = add splat (i64 3), [[TMP24]] +; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]] +; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP25]], i64 0 +; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP28]], align 1 +; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]] +; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP30]], align 1 +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP32:%.*]] = xor [[TMP31]], splat (i1 true) +; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) +; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]] +; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]] +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop @@ -271,23 +597,105 @@ loop.end: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 { -; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( -; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] -; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] +; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; MAX1-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX1: [[VECTOR_SCEVCHECK]]: +; MAX1-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4 +; MAX1-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3 +; MAX1-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64 +; MAX1-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; MAX1-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX1: [[VECTOR_PH]]: +; MAX1-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX1: [[VECTOR_BODY]]: +; MAX1-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; MAX1-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; MAX1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; MAX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; MAX1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 +; MAX1-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) +; MAX1-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16 +; MAX1-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) +; MAX1-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]]) +; MAX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; MAX1-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; MAX1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; MAX1: [[MIDDLE_SPLIT]]: +; MAX1-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX1: [[MIDDLE_BLOCK]]: +; MAX1-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX1: [[VECTOR_EARLY_EXIT]]: +; MAX1-NEXT: br label %[[LOOP_END]] +; MAX1: [[SCALAR_PH]]: +; MAX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( +; MAX2-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4 +; MAX2-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3 +; MAX2-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64 +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; MAX2-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; MAX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 +; MAX2-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) +; MAX2-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16 +; MAX2-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) +; MAX2-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]]) +; MAX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; MAX2-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; MAX2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop @@ -351,25 +759,106 @@ loop.end: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 { -; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( -; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] -; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] +; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX1-NEXT: [[ENTRY:.*]]: +; MAX1-NEXT: br label %[[LOOP:.*]] +; MAX1: [[LOOP]]: +; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ] +; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] +; MAX1: [[LOOP_INC]]: +; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] +; MAX1: [[LOOP_END]]: +; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] +; MAX1-NEXT: ret i64 [[RETVAL]] +; +; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs( +; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; MAX2-NEXT: [[ENTRY:.*]]: +; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64 +; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]]) +; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]] +; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; MAX2: [[VECTOR_SCEVCHECK]]: +; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3 +; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3 +; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]] +; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]] +; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4 +; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]] +; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0 +; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3 +; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3 +; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]] +; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]] +; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4 +; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]] +; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0 +; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]] +; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; MAX2: [[VECTOR_PH]]: +; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]] +; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] +; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]] +; MAX2-NEXT: br label %[[VECTOR_BODY:.*]] +; MAX2: [[VECTOR_BODY]]: +; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ] +; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]] +; MAX2-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0 +; MAX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP23]] +; MAX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP25]], align 1 +; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP23]] +; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 +; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP27]], align 1 +; MAX2-NEXT: [[TMP28:%.*]] = icmp eq [[WIDE_LOAD]], [[WIDE_LOAD5]] +; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]] +; MAX2-NEXT: [[TMP29:%.*]] = xor [[TMP28]], splat (i1 true) +; MAX2-NEXT: [[TMP30:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP29]]) +; MAX2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] +; MAX2-NEXT: [[TMP32:%.*]] = or i1 [[TMP30]], [[TMP31]] +; MAX2-NEXT: br i1 [[TMP32]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; MAX2: [[MIDDLE_SPLIT]]: +; MAX2-NEXT: br i1 [[TMP30]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; MAX2: [[MIDDLE_BLOCK]]: +; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] +; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]] +; MAX2: [[VECTOR_EARLY_EXIT]]: +; MAX2-NEXT: br label %[[LOOP_END]] +; MAX2: [[SCALAR_PH]]: +; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ] +; MAX2-NEXT: br label %[[LOOP:.*]] +; MAX2: [[LOOP]]: +; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; MAX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] +; MAX2: [[LOOP_INC]]: +; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]] +; MAX2: [[LOOP_END]]: +; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ] +; MAX2-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop @@ -440,3 +929,26 @@ loop.end: attributes #0 = { "target-features"="+sve" vscale_range(1,16) } attributes #1 = { "target-features"="+sve" } +;. +; MAX1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; MAX1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; MAX1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAX1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; MAX1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; MAX1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +;. +; MAX2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; MAX2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; MAX2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAX2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; MAX2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; MAX2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; MAX2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; MAX2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; MAX2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; MAX2: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; MAX2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; MAX2: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; MAX2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; MAX2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll index 929cc5b84d922..d2e3cc9cdb018 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \ -; RUN: -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f | FileCheck %s +; RUN: -scalable-vectorization=on -max-num-faulting-pointers=1 -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+v,+f | FileCheck %s target triple = "riscv64" @@ -151,19 +152,51 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 { ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr( ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5 +; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP1]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP1]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[LOOP_END1]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END1]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -275,19 +308,51 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range( ; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5 +; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64 +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[LOOP_END1]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END1]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -315,7 +380,7 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low( ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] @@ -325,9 +390,9 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1 ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -396,3 +461,11 @@ loop.end: attributes #0 = { vscale_range(2,1024) } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll index 23362004b1b2a..69477d6256491 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 | FileCheck %s declare void @init_mem(ptr, i64); From 4884adc0e5a8d3d8c31dd36772b506ae834e56fc Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 24 Feb 2025 10:46:35 +0000 Subject: [PATCH 3/4] Address review comments --- .../Vectorize/LoopVectorizationLegality.h | 6 +++--- .../Vectorize/LoopVectorizationLegality.cpp | 13 ++++++++----- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index b4bf528472c5c..6c0a3846cd946 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -434,9 +434,9 @@ class LoopVectorizationLegality { /// Return a vector of all potentially faulting pointers in a loop with /// uncountable early exits. - const SmallVectorImpl> * + ArrayRef> getPotentiallyFaultingPointers() const { - return &PotentiallyFaultingPtrs; + return PotentiallyFaultingPtrs; } /// Returns a HistogramInfo* for the given instruction if it was determined @@ -547,7 +547,7 @@ class LoopVectorizationLegality { /// Returns true if all loads in the loop contained in \p Loads can be /// analyzed as potentially faulting. Any loads that may fault are added to /// the member variable PotentiallyFaultingPtrs. - bool analyzePotentiallyFaultingLoads(SmallVectorImpl *Loads); + bool analyzePotentiallyFaultingLoads(SmallVectorImpl &Loads); /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index ff3954d556e39..daf15646ee45a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1603,17 +1603,20 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( } bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads( - SmallVectorImpl *Loads) { + SmallVectorImpl &Loads) { LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop " "with uncountable early exit:\n"); - for (LoadInst *LI : *Loads) { + for (LoadInst *LI : Loads) { LLVM_DEBUG(dbgs() << "LV: Load: " << *LI << '\n'); - Value *Ptr = LI->getPointerOperand(); - if (!Ptr) + if (LI->getPointerAddressSpace()) return false; + + Value *Ptr = LI->getPointerOperand(); const SCEV *PtrExpr = PSE.getSCEV(Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); // TODO: Deal with loop invariant pointers. + // NOTE: The reasoning below is only safe if the load executes at least + // once. if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) return false; auto Step = dyn_cast(AR->getStepRecurrence(*PSE.getSE())); @@ -1785,7 +1788,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { if (!NonDerefLoads.empty()) { if (!TTI->getMinPageSize() || - !analyzePotentiallyFaultingLoads(&NonDerefLoads)) { + !analyzePotentiallyFaultingLoads(NonDerefLoads)) { PotentiallyFaultingPtrs.clear(); reportVectorizationFailure( "Loop may fault", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fd38fb4e0a42f..358603289ec2d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2170,13 +2170,13 @@ class GeneratedRTChecks { } // namespace static void addPointerAlignmentChecks( - const SmallVectorImpl> *Ptrs, Function *F, + ArrayRef> Ptrs, Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF, unsigned IC) { ScalarEvolution *SE = PSE.getSE(); const DataLayout &DL = SE->getDataLayout(); - for (auto Ptr : *Ptrs) { + for (auto Ptr : Ptrs) { Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType()); APInt EltSize(PtrIntType->getScalarSizeInBits(), DL.getTypeStoreSize(Ptr.second).getFixedValue()); @@ -2186,7 +2186,7 @@ static void addPointerAlignmentChecks( SE->getMulExpr(ScevEC, SE->getConstant(EltSize), (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW)); const SCEV *Rem = SE->getURemExpr(Start, Align); - PSE.addPredicate(*(SE->getEqualPredicate(Rem, SE->getZero(PtrIntType)))); + PSE.addPredicate(*SE->getEqualPredicate(Rem, SE->getZero(PtrIntType))); } } From 10d7a812e4db0bf88240baf5570f6756aaf3085b Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 24 Feb 2025 11:02:38 +0000 Subject: [PATCH 4/4] Fix formatting issues --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 358603289ec2d..7f9ffffc16efe 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2169,10 +2169,11 @@ class GeneratedRTChecks { }; } // namespace -static void addPointerAlignmentChecks( - ArrayRef> Ptrs, Function *F, - PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF, - unsigned IC) { +static void +addPointerAlignmentChecks(ArrayRef> Ptrs, + Function *F, PredicatedScalarEvolution &PSE, + TargetTransformInfo *TTI, ElementCount VF, + unsigned IC) { ScalarEvolution *SE = PSE.getSE(); const DataLayout &DL = SE->getDataLayout();