diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 17f4b396f753b..42d8a7854039d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3506,6 +3506,97 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, return true; } +/// Speculate a conditional basic block flattening the CFG. +/// Compared to speculativelyExecuteBB, it allows \p ThenBB to have multiple +/// predecessors other than the current BB. An illustration of this transform is +/// turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// br label %EndBB +/// EndBB: +/// %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sel = select i1 %cmp, i1 false, i1 true +/// br label %EndBB +/// ThenBB: +/// br label %EndBB +/// EndBB: +/// %phi = phi i1 [ true, %ThenBB ], [ %sel, %BB ], [ false, %OtherBB ] +/// ... +/// \endcode +/// \returns true if the branch edge is removed. +static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert, + DomTreeUpdater *DTU, + const TargetTransformInfo &TTI) { + BasicBlock *BB = BI->getParent(); + BasicBlock *ThenBB = BI->getSuccessor(Invert); + BasicBlock *EndBB = BI->getSuccessor(!Invert); + + BranchInst *SuccBI = dyn_cast(ThenBB->getTerminator()); + if (!SuccBI || !SuccBI->isUnconditional() || SuccBI->getSuccessor(0) != EndBB) + return false; + if (&ThenBB->front() != SuccBI) + return false; + if (!isProfitableToSpeculate(BI, Invert, TTI)) + return false; + + InstructionCost Budget = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + InstructionCost Cost = 0; + unsigned SpeculatedInstructions = 0; + if (!validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions, + Cost, TTI) || + Cost > Budget) + return false; + + LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + + // Insert selects and rewrite the PHI operands. + Value *BrCond = BI->getCondition(); + IRBuilder Builder(BI); + for (PHINode &PN : EndBB->phis()) { + unsigned OrigI = PN.getBasicBlockIndex(BB); + unsigned ThenI = PN.getBasicBlockIndex(ThenBB); + Value *OrigV = PN.getIncomingValue(OrigI); + Value *ThenV = PN.getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; + + // Create a select whose true value is the speculatively executed value and + // false value is the pre-existing value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; + if (Invert) + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI); + PN.setIncomingValue(OrigI, V); + } + + // Modify CFG + ThenBB->removePredecessor(BB); + BranchInst *NewBI = Builder.CreateBr(EndBB); + // Transfer the metadata to the new branch instruction. + NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + LLVMContext::MD_annotation}); + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}}); + + ++NumSpeculations; + return true; +} + /// Return true if we can thread a branch across this block. static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; @@ -8125,6 +8216,13 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return requestResimplify(); } + if (Options.SpeculateBlocks) { + if (speculativelyExecuteEmptyBB(BI, /*Invert=*/false, DTU, TTI)) + return true; + if (speculativelyExecuteEmptyBB(BI, /*Invert=*/true, DTU, TTI)) + return true; + } + // If this is a branch on something for which we know the constant value in // predecessors (e.g. a phi node in the current block), thread control // through this block. diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll index c84310629e5fd..277091a65e862 100644 --- a/llvm/test/CodeGen/AArch64/and-sink.ll +++ b/llvm/test/CodeGen/AArch64/and-sink.ll @@ -11,15 +11,14 @@ define dso_local i32 @and_sink1(i32 %a, i1 %c) { ; CHECK-LABEL: and_sink1: ; CHECK: // %bb.0: -; CHECK-NEXT: tbz w1, #0, .LBB0_3 +; CHECK-NEXT: tbz w1, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb0 +; CHECK-NEXT: tst w0, #0x4 ; CHECK-NEXT: adrp x8, A +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: str wzr, [x8, :lo12:A] -; CHECK-NEXT: tbnz w0, #2, .LBB0_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_3: // %bb2 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll index 3645718968f9e..c1932f055a4f7 100644 --- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll +++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=aarch64 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -phi-node-folding-threshold=0 | FileCheck %s ; When consuming profile data we sometimes flip a branch to improve runtime ; performance. If we are optimizing for size, we avoid changing the branch to diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index 6449c3e11d667..dc23ae0af3157 100644 --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: cmp w8, #10 +; CHECK-NEXT: ldr w9, [x8] ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] +; CHECK-NEXT: cmp w9, #10 ; CHECK-NEXT: b.le .LBB0_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -29,18 +29,17 @@ define i32 @combine_gt_ge_10() #0 { ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_3: // %lor.lhs.false -; CHECK-NEXT: b.lt .LBB0_6 +; CHECK-NEXT: cmp w9, #10 +; CHECK-NEXT: b.lt .LBB0_5 ; CHECK-NEXT: .LBB0_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB0_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_6: // %if.end +; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -80,34 +79,27 @@ define i32 @combine_gt_lt_5() #0 { ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmp w8, #5 -; CHECK-NEXT: b.le .LBB1_3 +; CHECK-NEXT: b.le .LBB1_2 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:c] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB1_6 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_3: // %lor.lhs.false -; CHECK-NEXT: b.ge .LBB1_6 -; CHECK-NEXT: // %bb.4: // %land.lhs.true3 +; CHECK-NEXT: b .LBB1_4 +; CHECK-NEXT: .LBB1_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB1_5 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: .LBB1_4: // %return ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB1_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_6: // %if.end +; CHECK-NEXT: .LBB1_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -145,10 +137,10 @@ define i32 @combine_lt_ge_5() #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: cmp w8, #5 +; CHECK-NEXT: ldr w9, [x8] ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] +; CHECK-NEXT: cmp w9, #5 ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -161,18 +153,17 @@ define i32 @combine_lt_ge_5() #0 { ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_3: // %lor.lhs.false -; CHECK-NEXT: b.gt .LBB2_6 +; CHECK-NEXT: cmp w9, #5 +; CHECK-NEXT: b.gt .LBB2_5 ; CHECK-NEXT: .LBB2_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB2_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_6: // %if.end +; CHECK-NEXT: .LBB2_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -212,34 +203,27 @@ define i32 @combine_lt_gt_5() #0 { ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmp w8, #5 -; CHECK-NEXT: b.ge .LBB3_3 +; CHECK-NEXT: b.ge .LBB3_2 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:c] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB3_6 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_3: // %lor.lhs.false -; CHECK-NEXT: b.le .LBB3_6 -; CHECK-NEXT: // %bb.4: // %land.lhs.true3 +; CHECK-NEXT: b .LBB3_4 +; CHECK-NEXT: .LBB3_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB3_5 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: .LBB3_4: // %return ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB3_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_6: // %if.end +; CHECK-NEXT: .LBB3_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -279,34 +263,27 @@ define i32 @combine_gt_lt_n5() #0 { ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmn w8, #5 -; CHECK-NEXT: b.le .LBB4_3 +; CHECK-NEXT: b.le .LBB4_2 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:c] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB4_6 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_3: // %lor.lhs.false -; CHECK-NEXT: b.ge .LBB4_6 -; CHECK-NEXT: // %bb.4: // %land.lhs.true3 +; CHECK-NEXT: b .LBB4_4 +; CHECK-NEXT: .LBB4_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB4_5 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: .LBB4_4: // %return ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB4_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_6: // %if.end +; CHECK-NEXT: .LBB4_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -346,34 +323,27 @@ define i32 @combine_lt_gt_n5() #0 { ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmn w8, #5 -; CHECK-NEXT: b.ge .LBB5_3 +; CHECK-NEXT: b.ge .LBB5_2 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:c] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB5_6 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_3: // %lor.lhs.false -; CHECK-NEXT: b.le .LBB5_6 -; CHECK-NEXT: // %bb.4: // %land.lhs.true3 +; CHECK-NEXT: b .LBB5_4 +; CHECK-NEXT: .LBB5_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB5_5 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: .LBB5_4: // %return ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB5_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_6: // %if.end +; CHECK-NEXT: .LBB5_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: @@ -499,24 +469,17 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { ; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge ; CHECK-NEXT: ldr w8, [x19] ; CHECK-NEXT: .LBB7_4: // %while.end -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: b.gt .LBB7_7 -; CHECK-NEXT: // %bb.5: // %land.lhs.true -; CHECK-NEXT: adrp x8, :got:b -; CHECK-NEXT: adrp x9, :got:d -; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] -; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: ldr w9, [x9] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB7_7 -; CHECK-NEXT: // %bb.6: -; CHECK-NEXT: mov w0, #123 // =0x7b -; CHECK-NEXT: b .LBB7_8 -; CHECK-NEXT: .LBB7_7: // %if.end -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: .LBB7_8: // %return +; CHECK-NEXT: adrp x9, :got:b +; CHECK-NEXT: adrp x10, :got:d +; CHECK-NEXT: ldr x9, [x9, :got_lo12:b] +; CHECK-NEXT: ldr x10, [x10, :got_lo12:d] ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr w9, [x9] +; CHECK-NEXT: ldr w10, [x10] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: ccmp w8, #2, #0, eq +; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: csel w0, w8, wzr, lt ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 @@ -564,52 +527,41 @@ return: ; preds = %if.end, %land.lhs.t define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 { ; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.gt .LBB8_3 +; CHECK-NEXT: b.gt .LBB8_4 ; CHECK-NEXT: // %bb.1: // %while.body.preheader +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: sub w19, w8, #1 ; CHECK-NEXT: .LBB8_2: // %while.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl do_something ; CHECK-NEXT: adds w19, w19, #1 ; CHECK-NEXT: b.mi .LBB8_2 -; CHECK-NEXT: .LBB8_3: // %while.end -; CHECK-NEXT: adrp x8, :got:c -; CHECK-NEXT: ldr x8, [x8, :got_lo12:c] -; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: cmn w8, #2 -; CHECK-NEXT: b.lt .LBB8_6 -; CHECK-NEXT: // %bb.4: // %land.lhs.true +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .LBB8_4: // %while.end ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d +; CHECK-NEXT: adrp x10, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] ; CHECK-NEXT: ldr x9, [x9, :got_lo12:d] +; CHECK-NEXT: ldr x10, [x10, :got_lo12:c] ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB8_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #123 // =0x7b -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB8_6: // %if.end -; CHECK-NEXT: .cfi_restore_state -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: ccmn w10, #3, #4, eq +; CHECK-NEXT: csel w0, w8, wzr, gt ; CHECK-NEXT: ret entry: %0 = load i32, ptr @a, align 4 @@ -782,12 +734,14 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel x9, x0, xzr, gt ; CHECK-NEXT: str x9, [x1] -; CHECK-NEXT: b.le .LBB11_2 +; CHECK-NEXT: b.le .LBB11_3 ; CHECK-NEXT: // %bb.1: // %lor.lhs.false ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ge .LBB11_4 -; CHECK-NEXT: b .LBB11_6 -; CHECK-NEXT: .LBB11_2: // %land.lhs.true +; CHECK-NEXT: b.ge .LBB11_5 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_3: // %land.lhs.true ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:c ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] @@ -795,11 +749,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB11_4 -; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: b.ne .LBB11_5 +; CHECK-NEXT: // %bb.4: ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_4: // %land.lhs.true3 +; CHECK-NEXT: .LBB11_5: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: adrp x9, :got:d ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] @@ -807,12 +761,7 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 { ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB11_6 -; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_6: // %if.end -; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = load i32, ptr @a, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll index 6478f5a37f782..df82e11441cb6 100644 --- a/llvm/test/CodeGen/AArch64/machine_cse.ll +++ b/llvm/test/CodeGen/AArch64/machine_cse.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -phi-node-folding-threshold=0 -tail-dup-placement=0 | FileCheck %s ; -tail-dup-placement causes tail duplication during layout. This breaks the ; assumptions of the test case as written (specifically, it creates an ; additional cmp instruction, creating a false positive), so we pass diff --git a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll index 1f30865c98e19..b235dd9f56815 100644 --- a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll +++ b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple aarch64-unknown-windows-msvc %s -o - | FileCheck %s +; RUN: llc -mtriple aarch64-unknown-windows-msvc -phi-node-folding-threshold=0 %s -o - | FileCheck %s declare i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll index fb9139c0d1285..8718d80220c30 100644 --- a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll +++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -192,42 +192,35 @@ define i32 @f0(i1 %c0, i32 %v, ptr %p) { ; V7M-NEXT: lsls r0, r0, #31 ; V7M-NEXT: beq .LBB1_2 ; V7M-NEXT: @ %bb.1: @ %A -; V7M-NEXT: tst.w r1, #16843009 -; V7M-NEXT: itt eq -; V7M-NEXT: moveq r0, #0 -; V7M-NEXT: bxeq lr -; V7M-NEXT: b .LBB1_3 +; V7M-NEXT: bics r0, r1, #-16843010 +; V7M-NEXT: it ne +; V7M-NEXT: movne r0, #1 +; V7M-NEXT: bx lr ; V7M-NEXT: .LBB1_2: @ %B ; V7M-NEXT: movs r0, #1 -; V7M-NEXT: tst.w r1, #16843009 ; V7M-NEXT: str r0, [r2] -; V7M-NEXT: itt ne -; V7M-NEXT: movne r0, #0 -; V7M-NEXT: bxne lr -; V7M-NEXT: .LBB1_3: @ %D -; V7M-NEXT: movs r0, #1 +; V7M-NEXT: bic r0, r1, #-16843010 +; V7M-NEXT: clz r0, r0 +; V7M-NEXT: lsrs r0, r0, #5 ; V7M-NEXT: bx lr ; ; V7A-LABEL: f0: ; V7A: @ %bb.0: @ %E -; V7A-NEXT: movw r3, #257 -; V7A-NEXT: tst r0, #1 -; V7A-NEXT: movt r3, #257 -; V7A-NEXT: and r1, r1, r3 -; V7A-NEXT: beq .LBB1_3 +; V7A-NEXT: mov r3, r0 +; V7A-NEXT: movw r0, #257 +; V7A-NEXT: movt r0, #257 +; V7A-NEXT: tst r3, #1 +; V7A-NEXT: and r0, r1, r0 +; V7A-NEXT: beq .LBB1_2 ; V7A-NEXT: @ %bb.1: @ %A -; V7A-NEXT: cmp r1, #0 -; V7A-NEXT: moveq r0, #0 -; V7A-NEXT: bxeq lr -; V7A-NEXT: .LBB1_2: @ %D -; V7A-NEXT: mov r0, #1 +; V7A-NEXT: cmp r0, #0 +; V7A-NEXT: movwne r0, #1 ; V7A-NEXT: bx lr -; V7A-NEXT: .LBB1_3: @ %B -; V7A-NEXT: mov r0, #1 -; V7A-NEXT: cmp r1, #0 -; V7A-NEXT: str r0, [r2] -; V7A-NEXT: mov r0, #0 -; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: .LBB1_2: @ %B +; V7A-NEXT: clz r0, r0 +; V7A-NEXT: mov r1, #1 +; V7A-NEXT: str r1, [r2] +; V7A-NEXT: lsr r0, r0, #5 ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f0: @@ -235,20 +228,16 @@ define i32 @f0(i1 %c0, i32 %v, ptr %p) { ; V7A-T-NEXT: lsls r0, r0, #31 ; V7A-T-NEXT: beq .LBB1_2 ; V7A-T-NEXT: @ %bb.1: @ %A -; V7A-T-NEXT: tst.w r1, #16843009 -; V7A-T-NEXT: itt eq -; V7A-T-NEXT: moveq r0, #0 -; V7A-T-NEXT: bxeq lr -; V7A-T-NEXT: b .LBB1_3 +; V7A-T-NEXT: bics r0, r1, #-16843010 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: movne r0, #1 +; V7A-T-NEXT: bx lr ; V7A-T-NEXT: .LBB1_2: @ %B ; V7A-T-NEXT: movs r0, #1 -; V7A-T-NEXT: tst.w r1, #16843009 ; V7A-T-NEXT: str r0, [r2] -; V7A-T-NEXT: itt ne -; V7A-T-NEXT: movne r0, #0 -; V7A-T-NEXT: bxne lr -; V7A-T-NEXT: .LBB1_3: @ %D -; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: bic r0, r1, #-16843010 +; V7A-T-NEXT: clz r0, r0 +; V7A-T-NEXT: lsrs r0, r0, #5 ; V7A-T-NEXT: bx lr ; ; V6M-LABEL: f0: @@ -309,39 +298,32 @@ define i32 @f1(i1 %c0, i32 %v, ptr %p) { ; V7M-NEXT: lsls r0, r0, #31 ; V7M-NEXT: beq .LBB2_2 ; V7M-NEXT: @ %bb.1: @ %A -; V7M-NEXT: tst.w r1, #100663296 -; V7M-NEXT: itt eq -; V7M-NEXT: moveq r0, #0 -; V7M-NEXT: bxeq lr -; V7M-NEXT: b .LBB2_3 +; V7M-NEXT: ands r0, r1, #100663296 +; V7M-NEXT: it ne +; V7M-NEXT: movne r0, #1 +; V7M-NEXT: bx lr ; V7M-NEXT: .LBB2_2: @ %B ; V7M-NEXT: movs r0, #1 -; V7M-NEXT: tst.w r1, #100663296 ; V7M-NEXT: str r0, [r2] -; V7M-NEXT: itt ne -; V7M-NEXT: movne r0, #0 -; V7M-NEXT: bxne lr -; V7M-NEXT: .LBB2_3: @ %D -; V7M-NEXT: movs r0, #1 +; V7M-NEXT: and r0, r1, #100663296 +; V7M-NEXT: clz r0, r0 +; V7M-NEXT: lsrs r0, r0, #5 ; V7M-NEXT: bx lr ; ; V7A-LABEL: f1: ; V7A: @ %bb.0: @ %E ; V7A-NEXT: tst r0, #1 -; V7A-NEXT: beq .LBB2_3 +; V7A-NEXT: beq .LBB2_2 ; V7A-NEXT: @ %bb.1: @ %A -; V7A-NEXT: tst r1, #100663296 -; V7A-NEXT: moveq r0, #0 -; V7A-NEXT: bxeq lr -; V7A-NEXT: .LBB2_2: @ %D -; V7A-NEXT: mov r0, #1 +; V7A-NEXT: ands r0, r1, #100663296 +; V7A-NEXT: movwne r0, #1 ; V7A-NEXT: bx lr -; V7A-NEXT: .LBB2_3: @ %B +; V7A-NEXT: .LBB2_2: @ %B ; V7A-NEXT: mov r0, #1 -; V7A-NEXT: tst r1, #100663296 ; V7A-NEXT: str r0, [r2] -; V7A-NEXT: mov r0, #0 -; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: and r0, r1, #100663296 +; V7A-NEXT: clz r0, r0 +; V7A-NEXT: lsr r0, r0, #5 ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f1: @@ -349,20 +331,16 @@ define i32 @f1(i1 %c0, i32 %v, ptr %p) { ; V7A-T-NEXT: lsls r0, r0, #31 ; V7A-T-NEXT: beq .LBB2_2 ; V7A-T-NEXT: @ %bb.1: @ %A -; V7A-T-NEXT: tst.w r1, #100663296 -; V7A-T-NEXT: itt eq -; V7A-T-NEXT: moveq r0, #0 -; V7A-T-NEXT: bxeq lr -; V7A-T-NEXT: b .LBB2_3 +; V7A-T-NEXT: ands r0, r1, #100663296 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: movne r0, #1 +; V7A-T-NEXT: bx lr ; V7A-T-NEXT: .LBB2_2: @ %B ; V7A-T-NEXT: movs r0, #1 -; V7A-T-NEXT: tst.w r1, #100663296 ; V7A-T-NEXT: str r0, [r2] -; V7A-T-NEXT: itt ne -; V7A-T-NEXT: movne r0, #0 -; V7A-T-NEXT: bxne lr -; V7A-T-NEXT: .LBB2_3: @ %D -; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: and r0, r1, #100663296 +; V7A-T-NEXT: clz r0, r0 +; V7A-T-NEXT: lsrs r0, r0, #5 ; V7A-T-NEXT: bx lr ; ; V6M-LABEL: f1: diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d076cb00ad7e0..858a5b6d0039d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -phi-node-folding-threshold=0 -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s @var_36 = hidden local_unnamed_addr global i8 0, align 1 @arr_61 = hidden local_unnamed_addr global [1 x i32] zeroinitializer, align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll index dd5ff12fda613..333480e0eb95f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -O2 -mattr=avx < %s | opt -expand-reductions -mattr=avx -S | FileCheck %s +; RUN: opt -O2 -phi-node-folding-threshold=0 -mattr=avx < %s | opt -expand-reductions -mattr=avx -S | FileCheck %s ; Test if SLP vector reduction patterns are recognized ; and optionally converted to reduction intrinsics and diff --git a/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll index aa4fca8da1470..648a52259429b 100644 --- a/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll +++ b/llvm/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll @@ -50,16 +50,15 @@ Exit: ; preds = %Succ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_NOMERGE:%.*]] -; CHECK: BB.nomerge: ; CHECK-NEXT: br label [[SUCC:%.*]] ; CHECK: Succ: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[BB_NOMERGE]] ], [ 2, [[COMMON:%.*]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[SPEC_SELECT:%.*]], [[COMMON:%.*]] ] ; CHECK-NEXT: [[CONDE:%.*]] = call i1 @foo() ; CHECK-NEXT: br i1 [[CONDE]], label [[COMMON]], label [[EXIT:%.*]] ; CHECK: Common: ; CHECK-NEXT: [[COND:%.*]] = call i1 @foo() -; CHECK-NEXT: br i1 [[COND]], label [[BB_NOMERGE]], label [[SUCC]] +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[COND]], i32 1, i32 2 +; CHECK-NEXT: br label [[SUCC]] ; CHECK: Exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll index 6831102955a72..1c97ee7c5a330 100644 --- a/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll +++ b/llvm/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll @@ -224,16 +224,15 @@ Exit: ; preds = %Succ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_NOMERGE:%.*]] -; CHECK: BB.nomerge: ; CHECK-NEXT: br label [[SUCC:%.*]] ; CHECK: Succ: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[BB_NOMERGE]] ], [ 2, [[COMMON:%.*]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[SPEC_SELECT:%.*]], [[COMMON:%.*]] ] ; CHECK-NEXT: [[CONDE:%.*]] = call i1 @foo() ; CHECK-NEXT: br i1 [[CONDE]], label [[COMMON]], label [[EXIT:%.*]] ; CHECK: Common: ; CHECK-NEXT: [[COND:%.*]] = call i1 @foo() -; CHECK-NEXT: br i1 [[COND]], label [[BB_NOMERGE]], label [[SUCC]] +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[COND]], i32 1, i32 2 +; CHECK-NEXT: br label [[SUCC]] ; CHECK: Exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll index 07eac90186ef6..b835c115a6c90 100644 --- a/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll +++ b/llvm/test/Transforms/SimplifyCFG/speculate-blocks.ll @@ -78,3 +78,244 @@ final_right: declare void @sideeffect0() declare void @sideeffect1() + +define i1 @speculate_empty_bb(i32 %x, i32 %y) { +; YES-LABEL: define i1 @speculate_empty_bb +; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; YES-NEXT: start: +; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; YES: bb6: +; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; YES-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP2]], i1 true, i1 false +; YES-NEXT: br label [[BB3:%.*]] +; YES: bb5: +; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; YES-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[CMP3]], i1 false, i1 true +; YES-NEXT: br label [[BB3]] +; YES: bb3: +; YES-NEXT: [[RET:%.*]] = phi i1 [ [[SPEC_SELECT]], [[BB6]] ], [ [[SPEC_SELECT1]], [[BB5]] ] +; YES-NEXT: ret i1 [[RET]] +; +; NO-LABEL: define i1 @speculate_empty_bb +; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; NO-NEXT: start: +; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; NO: bb6: +; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; NO: bb5: +; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; NO: bb2: +; NO-NEXT: br label [[BB3]] +; NO: bb3: +; NO-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ] +; NO-NEXT: ret i1 [[RET]] +; +start: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %bb6, label %bb5 + +bb6: + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %bb2, label %bb3 + +bb5: + %cmp3 = icmp ult i32 %x, %y + br i1 %cmp3, label %bb3, label %bb2 + +bb2: + br label %bb3 + +bb3: + %ret = phi i1 [ true, %bb2 ], [ false, %bb6 ], [ false, %bb5 ] + ret i1 %ret +} + +define i32 @speculate_empty_bb_not_simplifiable(i32 %x, i32 %y) { +; YES-LABEL: define i32 @speculate_empty_bb_not_simplifiable +; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; YES-NEXT: start: +; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; YES-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP2]], i32 10, i32 20 +; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; YES-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[CMP3]], i32 30, i32 10 +; YES-NEXT: [[RET:%.*]] = select i1 [[CMP1]], i32 [[SPEC_SELECT]], i32 [[SPEC_SELECT1]] +; YES-NEXT: ret i32 [[RET]] +; +; NO-LABEL: define i32 @speculate_empty_bb_not_simplifiable +; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; NO-NEXT: start: +; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; NO: bb6: +; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; NO: bb5: +; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; NO: bb2: +; NO-NEXT: br label [[BB3]] +; NO: bb3: +; NO-NEXT: [[RET:%.*]] = phi i32 [ 10, [[BB2]] ], [ 20, [[BB6]] ], [ 30, [[BB5]] ] +; NO-NEXT: ret i32 [[RET]] +; +start: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %bb6, label %bb5 + +bb6: + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %bb2, label %bb3 + +bb5: + %cmp3 = icmp ult i32 %x, %y + br i1 %cmp3, label %bb3, label %bb2 + +bb2: + br label %bb3 + +bb3: + %ret = phi i32 [ 10, %bb2 ], [ 20, %bb6 ], [ 30, %bb5 ] + ret i32 %ret +} + +define i1 @speculate_nonempty_bb(i32 %x, i32 %y) { +; YES-LABEL: define i1 @speculate_nonempty_bb +; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; YES-NEXT: start: +; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; YES: bb6: +; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; YES: bb5: +; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; YES: bb2: +; YES-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[BB6]] ], [ [[Y]], [[BB5]] ] +; YES-NEXT: [[CMP4:%.*]] = icmp eq i32 [[PHI]], 0 +; YES-NEXT: br label [[BB3]] +; YES: bb3: +; YES-NEXT: [[RET:%.*]] = phi i1 [ [[CMP4]], [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ] +; YES-NEXT: ret i1 [[RET]] +; +; NO-LABEL: define i1 @speculate_nonempty_bb +; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; NO-NEXT: start: +; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; NO: bb6: +; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; NO: bb5: +; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; NO: bb2: +; NO-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[BB6]] ], [ [[Y]], [[BB5]] ] +; NO-NEXT: [[CMP4:%.*]] = icmp eq i32 [[PHI]], 0 +; NO-NEXT: br label [[BB3]] +; NO: bb3: +; NO-NEXT: [[RET:%.*]] = phi i1 [ [[CMP4]], [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ] +; NO-NEXT: ret i1 [[RET]] +; +start: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %bb6, label %bb5 + +bb6: + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %bb2, label %bb3 + +bb5: + %cmp3 = icmp ult i32 %x, %y + br i1 %cmp3, label %bb3, label %bb2 + +bb2: + %phi = phi i32 [ %x, %bb6 ], [ %y, %bb5 ] + %cmp4 = icmp eq i32 %phi, 0 + br label %bb3 + +bb3: + %ret = phi i1 [ %cmp4, %bb2 ], [ false, %bb6 ], [ false, %bb5 ] + ret i1 %ret +} + +define i1 @speculate_empty_bb_too_many_select(i32 %x, i32 %y) { +; YES-LABEL: define i1 @speculate_empty_bb_too_many_select +; YES-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; YES-NEXT: start: +; YES-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; YES-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; YES: bb6: +; YES-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; YES-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; YES: bb5: +; YES-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; YES-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; YES: bb2: +; YES-NEXT: br label [[BB3]] +; YES: bb3: +; YES-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ] +; YES-NEXT: [[RET2:%.*]] = phi i32 [ [[X]], [[BB2]] ], [ [[Y]], [[BB6]] ], [ [[X]], [[BB5]] ] +; YES-NEXT: [[RET3:%.*]] = phi i32 [ [[Y]], [[BB2]] ], [ [[X]], [[BB6]] ], [ [[X]], [[BB5]] ] +; YES-NEXT: [[RET4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 3, [[BB6]] ], [ 5, [[BB5]] ] +; YES-NEXT: [[ADD:%.*]] = add i32 [[RET2]], [[RET3]] +; YES-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[RET4]] +; YES-NEXT: [[CMP4:%.*]] = icmp eq i32 [[ADD2]], 0 +; YES-NEXT: [[AND:%.*]] = and i1 [[RET]], [[CMP4]] +; YES-NEXT: ret i1 [[AND]] +; +; NO-LABEL: define i1 @speculate_empty_bb_too_many_select +; NO-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) { +; NO-NEXT: start: +; NO-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 0 +; NO-NEXT: br i1 [[CMP1]], label [[BB6:%.*]], label [[BB5:%.*]] +; NO: bb6: +; NO-NEXT: [[CMP2:%.*]] = icmp eq i32 [[Y]], 0 +; NO-NEXT: br i1 [[CMP2]], label [[BB2:%.*]], label [[BB3:%.*]] +; NO: bb5: +; NO-NEXT: [[CMP3:%.*]] = icmp ult i32 [[X]], [[Y]] +; NO-NEXT: br i1 [[CMP3]], label [[BB3]], label [[BB2]] +; NO: bb2: +; NO-NEXT: br label [[BB3]] +; NO: bb3: +; NO-NEXT: [[RET:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB6]] ], [ false, [[BB5]] ] +; NO-NEXT: [[RET2:%.*]] = phi i32 [ [[X]], [[BB2]] ], [ [[Y]], [[BB6]] ], [ [[X]], [[BB5]] ] +; NO-NEXT: [[RET3:%.*]] = phi i32 [ [[Y]], [[BB2]] ], [ [[X]], [[BB6]] ], [ [[X]], [[BB5]] ] +; NO-NEXT: [[RET4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 3, [[BB6]] ], [ 5, [[BB5]] ] +; NO-NEXT: [[ADD:%.*]] = add i32 [[RET2]], [[RET3]] +; NO-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[RET4]] +; NO-NEXT: [[CMP4:%.*]] = icmp eq i32 [[ADD2]], 0 +; NO-NEXT: [[AND:%.*]] = and i1 [[RET]], [[CMP4]] +; NO-NEXT: ret i1 [[AND]] +; +start: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %bb6, label %bb5 + +bb6: + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %bb2, label %bb3 + +bb5: + %cmp3 = icmp ult i32 %x, %y + br i1 %cmp3, label %bb3, label %bb2 + +bb2: + br label %bb3 + +bb3: + %ret = phi i1 [ true, %bb2 ], [ false, %bb6 ], [ false, %bb5 ] + %ret2 = phi i32 [ %x, %bb2 ], [ %y, %bb6 ], [ %x, %bb5 ] + %ret3 = phi i32 [ %y, %bb2 ], [ %x, %bb6 ], [ %x, %bb5 ] + %ret4 = phi i32 [ 0, %bb2 ], [ 3, %bb6 ], [ 5, %bb5 ] + %add = add i32 %ret2, %ret3 + %add2 = add i32 %add, %ret4 + %cmp4 = icmp eq i32 %add2, 0 + %and = and i1 %ret, %cmp4 + ret i1 %and +}