From 170cfa2c576a3c568037cb2901615b4608515aa8 Mon Sep 17 00:00:00 2001 From: csstormq Date: Thu, 6 Jun 2024 12:04:50 +0800 Subject: [PATCH 1/8] [InstCombine] Preserve the nsw flag for (X | Op01C) + Op1C --> X + (Op01C + Op1C) --- .../InstCombine/InstCombineAddSub.cpp | 13 +++++++++++-- .../InstCombine/sadd-with-overflow.ll | 2 +- .../Transforms/InstCombine/sdiv-simplify.ll | 15 +++++++++++++++ .../AArch64/matrix-extract-insert.ll | 18 +++++++++--------- 4 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/sdiv-simplify.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 8205b49dfbe2f..b2c1cfcd1148c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -905,8 +905,17 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add` Constant *Op01C; - if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) - return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); + if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) { + bool HasNSW = Add.hasNoSignedWrap(); + BinaryOperator *NewAdd = + BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); + // Preserve the nsw flag so that there is a chance to make some other + // transformations. + // For some cases, sdiv can be converted to udiv when the newly created add + // carrying the nsw flag is one of its operands. + NewAdd->setHasNoSignedWrap(HasNSW); + return NewAdd; + } // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C) const APInt *C2; diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll index 729ca03ddfd15..e4dd2d10637d3 100644 --- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll @@ -125,7 +125,7 @@ define { i32, i1 } @fold_sub_simple(i32 %x) { define { i32, i1 } @fold_with_distjoin_or(i32 %x) { ; CHECK-LABEL: @fold_with_distjoin_or( -; CHECK-NEXT: [[B:%.*]] = add i32 [[X:%.*]], 6 +; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[X:%.*]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[B]], 0 ; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; diff --git a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll new file mode 100644 index 0000000000000..91d648e9093a7 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: @sdiv_to_udiv( +; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8 +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0:%.*]], 6242049 +; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192 +; CHECK-NEXT: ret i32 [[T3]] +; + %t0 = shl nuw nsw i32 %arg0, 8 + %t1 = or disjoint i32 %t0, 1 + %t2 = add nuw nsw i32 %t1, 6242048 + %t3 = sdiv i32 %t2, 192 + ret i32 %t3 +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index 5cbf50e06fbe8..c4cd2379a7be2 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -182,11 +182,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.1: ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 -; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[INDEX_1]], 16 +; CHECK-NEXT: [[TMP34:%.*]] = add nsw i64 [[INDEX_1]], 16 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[INDEX_1]], 17 -; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX_1]], 18 +; CHECK-NEXT: [[TMP37:%.*]] = add nsw i64 [[INDEX_1]], 17 +; CHECK-NEXT: [[TMP38:%.*]] = add nsw i64 [[INDEX_1]], 18 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 ; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], @@ -259,11 +259,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.2: ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] ; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 -; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX_2]], 31 +; CHECK-NEXT: [[TMP65:%.*]] = add nsw i64 [[INDEX_2]], 31 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX_2]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX_2]], 33 +; CHECK-NEXT: [[TMP68:%.*]] = add nsw i64 [[INDEX_2]], 32 +; CHECK-NEXT: [[TMP69:%.*]] = add nsw i64 [[INDEX_2]], 33 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 ; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 ; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], @@ -336,11 +336,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.3: ; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] ; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 -; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[INDEX_3]], 46 +; CHECK-NEXT: [[TMP96:%.*]] = add nsw i64 [[INDEX_3]], 46 ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 ; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 -; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX_3]], 47 -; CHECK-NEXT: [[TMP100:%.*]] = add i64 [[INDEX_3]], 48 +; CHECK-NEXT: [[TMP99:%.*]] = add nsw i64 [[INDEX_3]], 47 +; CHECK-NEXT: [[TMP100:%.*]] = add nsw i64 [[INDEX_3]], 48 ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 ; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], From 8b6eca178655285d688fcaae3f496b0bf700beac Mon Sep 17 00:00:00 2001 From: csstormq Date: Thu, 6 Jun 2024 17:03:08 +0800 Subject: [PATCH 2/8] update test case using the update_test_checks.py script --- llvm/test/Transforms/InstCombine/sdiv-simplify.ll | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll index 91d648e9093a7..cef999a398caf 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll @@ -1,9 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { -; CHECK-LABEL: @sdiv_to_udiv( -; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8 -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0:%.*]], 6242049 +; CHECK-LABEL: define i32 @sdiv_to_udiv( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0]], 8 +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049 ; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192 ; CHECK-NEXT: ret i32 [[T3]] ; From 3cf7a70ee6a0cf9cb40e5a762be272575196529c Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 11:47:06 +0800 Subject: [PATCH 3/8] fixup! Preserve the nsw flag iff the sum of Op01C and Op1C will not overflow --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index b2c1cfcd1148c..9b86c4cefac06 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -906,14 +906,14 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add` Constant *Op01C; if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) { - bool HasNSW = Add.hasNoSignedWrap(); BinaryOperator *NewAdd = BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); - // Preserve the nsw flag so that there is a chance to make some other - // transformations. + // Preserve the nsw flag iff the sum of Op01C and Op1C will not overflow + // so that there is a chance to make some other transformations. // For some cases, sdiv can be converted to udiv when the newly created add // carrying the nsw flag is one of its operands. - NewAdd->setHasNoSignedWrap(HasNSW); + if (willNotOverflowSignedAdd(Op01C, Op1C, Add)) + NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap()); return NewAdd; } From 40037ff74201085468daf3be7f5566533e3b0c89 Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 15:31:52 +0800 Subject: [PATCH 4/8] Add more test cases --- llvm/test/Transforms/InstCombine/add.ll | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 25087fef68a11..e3a2730784824 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4137,5 +4137,25 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) { ret i32 %r } +define i8 @fold_add_constant_preserve_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_preserve_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], -120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, 8 + ret i8 %add +} + +define i8 @fold_add_constant_no_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_no_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, -8 + ret i8 %add +} + declare void @llvm.assume(i1) declare void @fake_func(i32) From d2f4beea4371aab37f93f7d7b73f6ef35b893740 Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 17:33:23 +0800 Subject: [PATCH 5/8] Resolve conversations 1. Preserve the nuw flag also 2. Remove unnecessary comment 3. Move test cases to the better place --- .../InstCombine/InstCombineAddSub.cpp | 5 +- llvm/test/Transforms/InstCombine/add.ll | 52 ++++++++++++------- .../Transforms/InstCombine/sdiv-simplify.ll | 17 ------ .../AArch64/matrix-extract-insert.ll | 19 +++---- 4 files changed, 43 insertions(+), 50 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/sdiv-simplify.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 9b86c4cefac06..868b1c4d1d206 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -908,12 +908,9 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) { BinaryOperator *NewAdd = BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); - // Preserve the nsw flag iff the sum of Op01C and Op1C will not overflow - // so that there is a chance to make some other transformations. - // For some cases, sdiv can be converted to udiv when the newly created add - // carrying the nsw flag is one of its operands. if (willNotOverflowSignedAdd(Op01C, Op1C, Add)) NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap()); + NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap()); return NewAdd; } diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index e3a2730784824..8779c45147b0d 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1510,6 +1510,37 @@ define i8 @add_like_or_t2_extrause(i8 %x) { %r = add i8 %i1, 42 ret i8 %r } +define i8 @fold_add_constant_preserve_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_preserve_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], -120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, 8 + ret i8 %add +} +define i8 @fold_add_constant_no_nsw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_no_nsw( +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 120 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, -128 + %add = add nsw i8 %or, -8 + ret i8 %add +} +define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: @sdiv_to_udiv( +; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8 +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049 +; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192 +; CHECK-NEXT: ret i32 [[T3]] +; + %t0 = shl nuw nsw i32 %arg0, 8 + %t1 = or disjoint i32 %t0, 1 + %t2 = add nuw nsw i32 %t1, 6242048 + %t3 = sdiv i32 %t2, 192 + ret i32 %t3 +} define i8 @add_like_or_disjoint(i8 %x) { ; CHECK-LABEL: @add_like_or_disjoint( @@ -4137,25 +4168,6 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) { ret i32 %r } -define i8 @fold_add_constant_preserve_nsw(i8 %x) { -; CHECK-LABEL: @fold_add_constant_preserve_nsw( -; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], -120 -; CHECK-NEXT: ret i8 [[ADD]] -; - %or = or disjoint i8 %x, -128 - %add = add nsw i8 %or, 8 - ret i8 %add -} - -define i8 @fold_add_constant_no_nsw(i8 %x) { -; CHECK-LABEL: @fold_add_constant_no_nsw( -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 120 -; CHECK-NEXT: ret i8 [[ADD]] -; - %or = or disjoint i8 %x, -128 - %add = add nsw i8 %or, -8 - ret i8 %add -} - declare void @llvm.assume(i1) declare void @fake_func(i32) + diff --git a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll b/llvm/test/Transforms/InstCombine/sdiv-simplify.ll deleted file mode 100644 index cef999a398caf..0000000000000 --- a/llvm/test/Transforms/InstCombine/sdiv-simplify.ll +++ /dev/null @@ -1,17 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instcombine -S | FileCheck %s - -define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { -; CHECK-LABEL: define i32 @sdiv_to_udiv( -; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { -; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0]], 8 -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049 -; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192 -; CHECK-NEXT: ret i32 [[T3]] -; - %t0 = shl nuw nsw i32 %arg0, 8 - %t1 = or disjoint i32 %t0, 1 - %t2 = add nuw nsw i32 %t1, 6242048 - %t3 = sdiv i32 %t2, 192 - ret i32 %t3 -} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index c4cd2379a7be2..956d7ebf9aff0 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -182,11 +182,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.1: ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 -; CHECK-NEXT: [[TMP34:%.*]] = add nsw i64 [[INDEX_1]], 16 +; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = add nsw i64 [[INDEX_1]], 17 -; CHECK-NEXT: [[TMP38:%.*]] = add nsw i64 [[INDEX_1]], 18 +; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17 +; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 ; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], @@ -259,11 +259,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.2: ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] ; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 -; CHECK-NEXT: [[TMP65:%.*]] = add nsw i64 [[INDEX_2]], 31 +; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = add nsw i64 [[INDEX_2]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = add nsw i64 [[INDEX_2]], 33 +; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32 +; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 ; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 ; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], @@ -336,11 +336,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.3: ; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] ; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 -; CHECK-NEXT: [[TMP96:%.*]] = add nsw i64 [[INDEX_3]], 46 +; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46 ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 ; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 -; CHECK-NEXT: [[TMP99:%.*]] = add nsw i64 [[INDEX_3]], 47 -; CHECK-NEXT: [[TMP100:%.*]] = add nsw i64 [[INDEX_3]], 48 +; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47 +; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 ; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], @@ -537,3 +537,4 @@ define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) { %shuffle = shufflevector <4 x float> %vecinit13, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } + From 8997ab058ddb7694623693d7164a3eae67550d5e Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 17:41:58 +0800 Subject: [PATCH 6/8] Remove added new lines --- llvm/test/Transforms/InstCombine/add.ll | 1 - .../Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll | 1 - 2 files changed, 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 8779c45147b0d..c85b516a53407 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4170,4 +4170,3 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) { declare void @llvm.assume(i1) declare void @fake_func(i32) - diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index 956d7ebf9aff0..db16413cdc94a 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -537,4 +537,3 @@ define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) { %shuffle = shufflevector <4 x float> %vecinit13, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } - From 045f3bad871ba6888011f8f1b93818a4542c461f Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 18:34:03 +0800 Subject: [PATCH 7/8] Resolve conversations 1. Save the overflow check if not needed 2. Add preserve nuw test case --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 4 ++-- llvm/test/Transforms/InstCombine/add.ll | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 868b1c4d1d206..0a73c58c07409 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -908,8 +908,8 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) { BinaryOperator *NewAdd = BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C)); - if (willNotOverflowSignedAdd(Op01C, Op1C, Add)) - NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap()); + NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap() && + willNotOverflowSignedAdd(Op01C, Op1C, Add)); NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap()); return NewAdd; } diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index c85b516a53407..dd0a341dfe70c 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1528,6 +1528,15 @@ define i8 @fold_add_constant_no_nsw(i8 %x) { %add = add nsw i8 %or, -8 ret i8 %add } +define i8 @fold_add_constant_preserve_nuw(i8 %x) { +; CHECK-LABEL: @fold_add_constant_preserve_nuw( +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], 1 +; CHECK-NEXT: ret i8 [[ADD]] +; + %or = or disjoint i8 %x, 128 + %add = add nuw i8 %or, 129 + ret i8 %add +} define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) { ; CHECK-LABEL: @sdiv_to_udiv( ; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8 From 1050bde9735f7ab70863cae785e9d5368b490534 Mon Sep 17 00:00:00 2001 From: csstormq Date: Fri, 7 Jun 2024 18:51:32 +0800 Subject: [PATCH 8/8] Fix test to not always return poison --- llvm/test/Transforms/InstCombine/add.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index dd0a341dfe70c..239e14682c306 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1530,11 +1530,11 @@ define i8 @fold_add_constant_no_nsw(i8 %x) { } define i8 @fold_add_constant_preserve_nuw(i8 %x) { ; CHECK-LABEL: @fold_add_constant_preserve_nuw( -; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], -116 ; CHECK-NEXT: ret i8 [[ADD]] ; %or = or disjoint i8 %x, 128 - %add = add nuw i8 %or, 129 + %add = add nuw i8 %or, 12 ret i8 %add } define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) {