From a399f06147a6dbab80e3a4d4d24a2f92b840b0fd Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 27 Feb 2025 10:11:53 +0000 Subject: [PATCH 1/3] [LV] Pre-commit tests for const-folder --- .../LoopVectorize/constantfolder.ll | 437 ++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/constantfolder.ll diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll new file mode 100644 index 0000000000000..737f2c22da1d8 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll @@ -0,0 +1,437 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define void @const_fold_ptradd(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_ptradd( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[CONST_0]] +; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ] + %gep = getelementptr i16, ptr %dst, i64 %const.0 + store i16 0, ptr %gep, align 2 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_inbounds_ptradd( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[CONST_0]] +; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ] + %gep = getelementptr inbounds i16, ptr %dst, i64 %const.0 + store i16 0, ptr %gep, align 2 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_select(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_select( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 +; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[OR:%.*]] = or i64 [[D]], [[CONST_1]] +; CHECK-NEXT: store i64 [[OR]], ptr [[DST]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ] + %or = or i64 %d, %const.1 + store i64 %or, ptr %dst, align 8 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_add_sub_mul_ashr_lshr( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> splat (i64 2), [[PREDPHI]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i64> [[TMP0]], [[PREDPHI]] +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i64> [[TMP1]], [[PREDPHI]] +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> [[TMP2]], splat (i64 3) +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[ADD:%.*]] = add i64 2, [[CONST_1]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ADD]], [[CONST_1]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SUB]], [[CONST_1]] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ASHR]], 3 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[MUL]], [[CONST_1]] +; CHECK-NEXT: store i64 [[LSHR]], ptr [[DST]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ] + %add = add i64 2, %const.1 + %sub = sub i64 %add, %const.1 + %ashr = ashr i64 %sub, %const.1 + %mul = mul i64 %ashr, 3 + %lshr = lshr i64 %mul, %const.1 + store i64 %lshr, ptr %dst, align 8 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_and_or_xor(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_and_or_xor( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]] +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]] +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[OR:%.*]] = or i64 2, [[CONST_1]] +; CHECK-NEXT: [[AND:%.*]] = and i64 [[OR]], [[CONST_1]] +; CHECK-NEXT: [[XOR:%.*]] = and i64 [[AND]], [[CONST_1]] +; CHECK-NEXT: store i64 [[XOR]], ptr [[DST]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ] + %or = or i64 2, %const.1 + %and = and i64 %or, %const.1 + %xor = and i64 %and, %const.1 + store i64 %xor, ptr %dst, align 8 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_cmp_zext(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_cmp_zext( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8> +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 +; CHECK-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[VAL:%.*]] = icmp ugt i64 2, [[CONST_1]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[VAL]] to i8 +; CHECK-NEXT: store i8 [[ZEXT]], ptr [[DST]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.1 = phi i64 [ %d, %else ], [ 1, %loop.header ] + %val = icmp ugt i64 2, %const.1 + %zext = zext i1 %val to i8 + store i8 %zext, ptr %dst, align 1 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +define void @const_fold_trunc(ptr %dst, i64 %d) { +; CHECK-LABEL: define void @const_fold_trunc( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16> +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: store i16 [[TMP1]], ptr [[DST]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[CONST_0]] to i16 +; CHECK-NEXT: store i16 [[TRUNC]], ptr [[DST]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + br label %loop.latch + +loop.latch: + %const.0 = phi i64 [ %d, %else ], [ 0, %loop.header ] + %trunc = trunc i64 %const.0 to i16 + store i16 %trunc, ptr %dst, align 2 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 100 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} From 711a37ea954e626ea9584a37172c035678f3dfe8 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 6 May 2025 10:10:01 +0100 Subject: [PATCH 2/3] VPlan: implement VPlan-level constant-folding Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants. --- .../Transforms/Vectorize/VPlanTransforms.cpp | 78 +++++++++ ...eave-to-widen-memory-remove-loop-region.ll | 3 +- .../RISCV/blocks-with-dead-instructions.ll | 48 +----- .../truncate-to-minimal-bitwidth-cost.ll | 6 +- .../LoopVectorize/SystemZ/pr47665.ll | 33 ++-- .../X86/drop-poison-generating-flags.ll | 92 +++++++++- ...licate-recipe-with-only-first-lane-used.ll | 162 +++++++++++++++--- .../constantfolder-infer-correct-gepty.ll | 63 +++++++ .../LoopVectorize/constantfolder.ll | 47 +---- .../debugloc-optimize-vfuf-term.ll | 18 +- .../LoopVectorize/first-order-recurrence.ll | 7 +- .../interleave-and-scalarize-only.ll | 3 +- 12 files changed, 406 insertions(+), 154 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8c8297bb1ae94..cbae568d8f149 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" @@ -938,10 +939,87 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } +/// Try to fold \p R using TargetFolder to a constant. Will succeed for a +/// handled \p Opcode if all \p Operands are constant. +static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode, + ArrayRef Operands, + const DataLayout &DL, + VPTypeAnalysis &TypeInfo) { + SmallVector Ops; + for (VPValue *Op : Operands) { + if (!Op->isLiveIn() || !Op->getLiveInIRValue()) + return nullptr; + Ops.push_back(Op->getLiveInIRValue()); + } + + TargetFolder Folder(DL); + if (Instruction::isBinaryOp(Opcode)) + return Folder.FoldBinOp(static_cast(Opcode), Ops[0], + Ops[1]); + if (Instruction::isCast(Opcode)) + return Folder.FoldCast(static_cast(Opcode), Ops[0], + TypeInfo.inferScalarType(R.getVPSingleValue())); + switch (Opcode) { + case VPInstruction::LogicalAnd: + return Folder.FoldSelect(Ops[0], Ops[1], + ConstantInt::getNullValue(Ops[1]->getType())); + case VPInstruction::Not: + return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], + Constant::getAllOnesValue(Ops[0]->getType())); + case Instruction::Select: + return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ICmp: + case Instruction::FCmp: + return Folder.FoldCmp(cast(R).getPredicate(), Ops[0], + Ops[1]); + case Instruction::GetElementPtr: { + auto &RFlags = cast(R); + auto *GEP = cast(RFlags.getUnderlyingInstr()); + return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops), + RFlags.getGEPNoWrapFlags()); + } + case VPInstruction::PtrAdd: + return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0], + Ops[1], + cast(R).getGEPNoWrapFlags()); + case Instruction::InsertElement: + return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::ExtractElement: + return Folder.FoldExtractElement(Ops[0], Ops[1]); + } + return nullptr; +} + /// Try to simplify recipe \p R. static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { using namespace llvm::VPlanPatternMatch; + // Constant folding. + if (TypeSwitch(&R) + .Case([&](auto *I) { + VPlan *Plan = R.getParent()->getPlan(); + const DataLayout &DL = + Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); + Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL, + TypeInfo); + if (V) + I->replaceAllUsesWith(Plan->getOrAddLiveIn(V)); + return V; + }) + .Default([](auto *) { return false; })) + return; + + // Fold PredPHI constant -> constant. + if (auto *PredPHI = dyn_cast(&R)) { + VPlan *Plan = R.getParent()->getPlan(); + VPValue *Op = PredPHI->getOperand(0); + if (!Op->isLiveIn() || !Op->getLiveInIRValue()) + return; + if (auto *C = dyn_cast(Op->getLiveInIRValue())) + PredPHI->replaceAllUsesWith(Plan->getOrAddLiveIn(C)); + } + // VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for // part 0 can be replaced by their start value, if only the first lane is // demanded. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 25e1e2d4cab1e..220e450cf30d5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -13,8 +13,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF2: [[VECTOR_PH]]: ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2: [[VECTOR_BODY]]: -; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 0, 1 -; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]] +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0 ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index a1a5deb93f6c7..ea8df1669288d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -47,14 +47,10 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP24]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -141,14 +137,10 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP21]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -235,14 +227,10 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP21]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -339,14 +327,10 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP24]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -445,14 +429,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP21]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -559,14 +539,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP24]] ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -663,15 +639,11 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP16]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -751,15 +723,11 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 8 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement zeroinitializer, i32 [[TMP17]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 61706fe4d55c0..f83b5782d5ad3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -165,10 +165,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP10]], 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[BROADCAST_SPLAT]] to -; CHECK-NEXT: [[TMP8:%.*]] = or splat (i1 true), [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[DST]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -181,7 +177,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VEC_IV]], i32 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP15]], i32 9) -; CHECK-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], splat (i1 true), zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0( zeroinitializer, [[BROADCAST_SPLAT2]], i32 1, [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index bb96c166f894c..02a876a3fda67 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -7,87 +7,86 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i1 true, false ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; CHECK: pred.store.if11: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; CHECK: pred.store.if13: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] ; CHECK: pred.store.continue14: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] ; CHECK: pred.store.if15: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] ; CHECK: pred.store.continue16: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] ; CHECK: pred.store.if17: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] ; CHECK: pred.store.continue18: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] ; CHECK: pred.store.if19: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] ; CHECK: pred.store.continue20: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] ; CHECK: pred.store.if21: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] ; CHECK: pred.store.continue22: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] ; CHECK: pred.store.if23: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] ; CHECK: pred.store.continue24: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] ; CHECK: pred.store.if25: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] ; CHECK: pred.store.continue26: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] ; CHECK: pred.store.if27: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] ; CHECK: pred.store.continue28: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] ; CHECK: pred.store.if29: -; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] ; CHECK: pred.store.continue30: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 207f3e8ba915e..38b58fbfd1021 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -673,30 +673,24 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] ; CHECK: [[PRED_SREM_IF]]: -; CHECK-NEXT: [[TMP3:%.*]] = srem i64 3, 0 ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] ; CHECK: [[PRED_SREM_CONTINUE]]: -; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SREM_IF]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] ; CHECK: [[PRED_SREM_IF1]]: -; CHECK-NEXT: [[TMP6:%.*]] = srem i64 3, 0 ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] ; CHECK: [[PRED_SREM_CONTINUE2]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] ; CHECK: [[PRED_SREM_IF3]]: -; CHECK-NEXT: [[TMP8:%.*]] = srem i64 3, 0 ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] ; CHECK: [[PRED_SREM_CONTINUE4]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] ; CHECK: [[PRED_SREM_IF5]]: -; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0 ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] ; CHECK: [[PRED_SREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP4]], -3 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], poison ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 @@ -738,6 +732,88 @@ exit: ret void } +; Variation of the above test with the poison value being used in all lanes. +define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst, ptr noalias %aux) { +; CHECK-LABEL: define void @recipe_without_underlying_instr_lanes_used( +; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[AUX:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] +; CHECK: [[PRED_SREM_IF]]: +; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] +; CHECK: [[PRED_SREM_CONTINUE]]: +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] +; CHECK: [[PRED_SREM_IF1]]: +; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] +; CHECK: [[PRED_SREM_CONTINUE2]]: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] +; CHECK: [[PRED_SREM_IF3]]: +; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] +; CHECK: [[PRED_SREM_CONTINUE4]]: +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] +; CHECK: [[PRED_SREM_IF5]]: +; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] +; CHECK: [[PRED_SREM_CONTINUE6]]: +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], poison +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 +; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; + +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ] + %cmp = icmp eq i64 %iv, %n + br i1 %cmp, label %loop.latch, label %then + +then: + %rem = srem i64 3, 0 + %add3 = add i64 %rem, -3 + %add5 = add i64 %iv, %add3 + %gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5 + %l = load i8, ptr %gep, align 1 + br label %loop.latch + +loop.latch: + %sr = phi i8 [ 0, %loop.header ], [ %l , %then ] + %p = phi i64 [ 0, %loop.header ], [ %rem, %then ] + store i64 %p, ptr %aux + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 %sr, ptr %gep.dst, align 4 + %inc = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %inc, 4 + br i1 %exitcond.not, label %exit, label %loop.header + +exit: + ret void +} + ; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds ; versionused unconditionally in the store in the latch. ; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore @@ -763,7 +839,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index e6c9ce3381f73..85bcd97d50c7d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -12,60 +12,166 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d) ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ [[DIV_I]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[RETVAL_0_I]] +; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %c = icmp eq i32 %x, 10 + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 true, label %loop.latch, label %else + +else: + %div.i = udiv i64 99, %d + br label %loop.latch + +loop.latch: + %retval.0.i = phi i64 [ %div.i, %else ], [ 0, %loop.header ] + %gep = getelementptr i16, ptr %dst, i64 %retval.0.i + store i16 0, ptr %gep, align 2 + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, 101 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +; Variant where ptradd cannot be const-folded. +define void @replicate_udiv_with_only_first_lane_used2(i32 %x, ptr %dst, i64 %d) { +; CHECK-LABEL: define void @replicate_udiv_with_only_first_lane_used2( +; CHECK-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ] -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]] ; CHECK: [[PRED_UDIV_IF]]: -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i32 0 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]] ; CHECK: [[PRED_UDIV_CONTINUE]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ] -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_UDIV_IF]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]] ; CHECK: [[PRED_UDIV_IF1]]: -; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP6]], i32 1 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]] ; CHECK: [[PRED_UDIV_CONTINUE2]]: -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP4]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_UDIV_IF1]] ] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]] ; CHECK: [[PRED_UDIV_IF3]]: -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP10:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP10]], i32 2 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]] ; CHECK: [[PRED_UDIV_CONTINUE4]]: -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]] +; CHECK-NEXT: [[TMP49:%.*]] = phi <4 x i64> [ [[TMP8]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP34]], %[[PRED_UDIV_IF3]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]] ; CHECK: [[PRED_UDIV_IF5]]: -; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP14:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP49]], i64 [[TMP14]], i32 3 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]] ; CHECK: [[PRED_UDIV_CONTINUE6]]: -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]] +; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i64> [ [[TMP49]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], %[[PRED_UDIV_IF5]] ] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]] ; CHECK: [[PRED_UDIV_IF7]]: -; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP18:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP18]], i32 0 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]] ; CHECK: [[PRED_UDIV_CONTINUE8]]: -; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ] -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP19]], %[[PRED_UDIV_IF7]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]] ; CHECK: [[PRED_UDIV_IF9]]: -; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP22:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP22]], i32 1 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]] ; CHECK: [[PRED_UDIV_CONTINUE10]]: -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]] +; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP20]], %[[PRED_UDIV_CONTINUE8]] ], [ [[TMP23]], %[[PRED_UDIV_IF9]] ] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]] ; CHECK: [[PRED_UDIV_IF11]]: -; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP26:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i64> [[TMP24]], i64 [[TMP26]], i32 2 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]] ; CHECK: [[PRED_UDIV_CONTINUE12]]: -; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i64> [ [[TMP24]], %[[PRED_UDIV_CONTINUE10]] ], [ [[TMP27]], %[[PRED_UDIV_IF11]] ] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP29]], label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]] ; CHECK: [[PRED_UDIV_IF13]]: -; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP30:%.*]] = udiv i64 99, [[D]] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i64> [[TMP28]], i64 [[TMP30]], i32 3 ; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]] ; CHECK: [[PRED_UDIV_CONTINUE14]]: -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]] -; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i64> [ [[TMP28]], %[[PRED_UDIV_CONTINUE12]] ], [ [[TMP31]], %[[PRED_UDIV_IF13]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP16]] +; CHECK-NEXT: [[PREDPHI15:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 1 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 0 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 1 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 2 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i64> [[PREDPHI15]], i32 3 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP47]] ; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP36]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP38]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP40]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP42]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP44]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP46]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP48]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -73,7 +179,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d) ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 99, [[D]] ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -83,7 +189,7 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d) ; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 101 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -93,7 +199,7 @@ entry: loop.header: ; preds = %loop.latch, %entry %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] - br i1 true, label %loop.latch, label %else + br i1 %c, label %loop.latch, label %else else: %div.i = udiv i64 99, %d @@ -115,4 +221,6 @@ exit: ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll new file mode 100644 index 0000000000000..e6eaa0edbf3bd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s + +@postscale = external constant [64 x float] + +define void @test(ptr %data) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[DATA:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 +; CHECK-NEXT: store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]] +; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]]) +; CHECK-NEXT: [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16 +; CHECK-NEXT: store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[TBAA4]] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8 +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %or.iv.1 = or disjoint i64 %iv, 1 + %gep.postscale = getelementptr [64 x float], ptr @postscale, i64 0, i64 %or.iv.1 + %load.postscale = load float, ptr %gep.postscale, align 4, !tbaa !0 + %lrint = tail call i64 @llvm.lrint.i64.f32(float %load.postscale) + %lrint.trunc = trunc i64 %lrint to i16 + store i16 %lrint.trunc, ptr %data, align 2, !tbaa !4 + %iv.next = add i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, 8 + br i1 %exit.cond, label %end, label %loop + +end: ; preds = %loop + ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"short", !2, i64 0} diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll index 737f2c22da1d8..c4281360ee802 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll @@ -7,8 +7,7 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -64,8 +63,7 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[D]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[PREDPHI]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -121,14 +119,10 @@ define void @const_fold_select(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[PREDPHI]] +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[D]], 1 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 @@ -181,19 +175,10 @@ define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> splat (i64 2), [[PREDPHI]] -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i64> [[TMP0]], [[PREDPHI]] -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i64> [[TMP1]], [[PREDPHI]] -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> [[TMP2]], splat (i64 3) -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], [[PREDPHI]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: store i64 [[TMP5]], ptr [[DST]], align 8 +; CHECK-NEXT: store i64 1, ptr [[DST]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -253,17 +238,10 @@ define void @const_fold_and_or_xor(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i64> splat (i64 2), [[PREDPHI]] -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[TMP0]], [[PREDPHI]] -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[TMP1]], [[PREDPHI]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: store i64 [[TMP3]], ptr [[DST]], align 8 +; CHECK-NEXT: store i64 1, ptr [[DST]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -319,16 +297,10 @@ define void @const_fold_cmp_zext(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> splat (i64 1), <4 x i64> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i64> splat (i64 2), [[PREDPHI]] -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 -; CHECK-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1 +; CHECK-NEXT: store i8 1, ptr [[DST]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -382,15 +354,10 @@ define void @const_fold_trunc(ptr %dst, i64 %d) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[D]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> splat (i1 true), <4 x i64> zeroinitializer, <4 x i64> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i64> [[PREDPHI]] to <4 x i16> ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0 -; CHECK-NEXT: store i16 [[TMP1]], ptr [[DST]], align 2 +; CHECK-NEXT: store i16 0, ptr [[DST]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll index 0f34f6243f155..913a13410f527 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll @@ -12,19 +12,19 @@ define i32 @foo(ptr %p) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: store i8 0, ptr [[P]], align 1, !dbg [[DBG3:![0-9]+]] -; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]], !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG7:![0-9]+]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG8:![0-9]+]] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG9]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG7]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG8]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG9:![0-9]+]] ; CHECK-NEXT: store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG3]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG10:![0-9]+]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1, !dbg [[DBG11:![0-9]+]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG8]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG7]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 0 ; @@ -65,9 +65,9 @@ exit: ; preds = %loop ; CHECK: [[META4]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 11, type: [[META5:![0-9]+]], spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META6:![0-9]+]]) ; CHECK: [[META5]] = distinct !DISubroutineType(types: [[META6]]) ; CHECK: [[META6]] = !{} -; CHECK: [[DBG7]] = !DILocation(line: 5, scope: [[META4]]) -; CHECK: [[DBG8]] = !DILocation(line: 9, scope: [[META4]]) -; CHECK: [[DBG9]] = !DILocation(line: 4, scope: [[META4]]) +; CHECK: [[DBG7]] = !DILocation(line: 9, scope: [[META4]]) +; CHECK: [[DBG8]] = !DILocation(line: 4, scope: [[META4]]) +; CHECK: [[DBG9]] = !DILocation(line: 5, scope: [[META4]]) ; CHECK: [[DBG10]] = !DILocation(line: 7, scope: [[META4]]) ; CHECK: [[DBG11]] = !DILocation(line: 8, scope: [[META4]]) ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]], [[META14:![0-9]+]]} diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index c622315a47178..594b8ff70feb8 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1254,11 +1254,10 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: entry: ; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP0]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 1, [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1266,7 +1265,7 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1276,7 +1275,7 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i64 [[VAR2_LCSSA]] ; ; SINK-AFTER-LABEL: @constant_folded_previous_value( diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index bf40d269e805e..fe59b022e1dd7 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -202,7 +202,6 @@ exit: ; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: -; DBG-NEXT: EMIT vp<[[CAST:%.+]]> = trunc ir<1> to i32 ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { @@ -210,7 +209,7 @@ exit: ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> ; DBG-NEXT: EMIT vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32 -; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>, vp<[[VF]] +; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>, vp<[[VF]] ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> ; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> From 1c28a313ebd278fa17662ab61a8d7c6b9421fb8f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 20 May 2025 14:14:49 +0100 Subject: [PATCH 3/3] [VPlanTransforms] Tweak comment --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cbae568d8f149..bc007589e1539 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -939,8 +939,8 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } -/// Try to fold \p R using TargetFolder to a constant. Will succeed for a -/// handled \p Opcode if all \p Operands are constant. +/// Try to fold \p R using TargetFolder to a constant. Will succeed and return a +/// non-nullptr Value for a handled \p Opcode if all \p Operands are constant. static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode, ArrayRef Operands, const DataLayout &DL,