diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index f6a0f5880cd5c..5740285675eba 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1826,11 +1826,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If the only use of phi is comparing it with a constant then we can // put this comparison in the incoming BB directly after a ucmp/scmp call // because we know that it will simplify to a single icmp. - // NOTE: the single-use check here is not only to ensure that the - // optimization is profitable, but also to avoid creating a potentially - // invalid phi node when we have a multi-edge in the CFG. const APInt *Ignored; - if (isa(InVal) && InVal->hasOneUse() && + if (isa(InVal) && InVal->hasOneUser() && match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) { OpsToMoveUseToIncomingBB.push_back(i); NewPhiValues.push_back(nullptr); @@ -1868,18 +1865,24 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // Clone the instruction that uses the phi node and move it into the incoming // BB because we know that the next iteration of InstCombine will simplify it. + SmallDenseMap Clones; for (auto OpIndex : OpsToMoveUseToIncomingBB) { Value *Op = PN->getIncomingValue(OpIndex); BasicBlock *OpBB = PN->getIncomingBlock(OpIndex); - Instruction *Clone = I.clone(); - for (Use &U : Clone->operands()) { - if (U == PN) - U = Op; - else - U = U->DoPHITranslation(PN->getParent(), OpBB); + Instruction *Clone = Clones.lookup(OpBB); + if (!Clone) { + Clone = I.clone(); + for (Use &U : Clone->operands()) { + if (U == PN) + U = Op; + else + U = U->DoPHITranslation(PN->getParent(), OpBB); + } + Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator()); + Clones.insert({OpBB, Clone}); } - Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator()); + NewPhiValues[OpIndex] = Clone; } diff --git a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll index 2b75d5c547511..cd40aa92ed4fd 100644 --- a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll +++ b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll @@ -133,3 +133,35 @@ exit: %r = icmp slt i8 %phi, 0 ret i1 %r } + +; Same as the first transformation, but the phi node uses the result of scmp twice. This verifies that we don't clone values more than once per block +define i1 @icmp_of_phi_of_scmp_with_constant_one_user_two_uses(i8 %c, i16 %x, i16 %y, i8 %false_val) { +; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_one_user_two_uses( +; CHECK-SAME: i8 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]] +; CHECK-NEXT: switch i8 [[C]], label %[[BB_2:.*]] [ +; CHECK-NEXT: i8 0, label %[[BB:.*]] +; CHECK-NEXT: i8 1, label %[[BB]] +; CHECK-NEXT: ] +; CHECK: [[BB_2]]: +; CHECK-NEXT: br label %[[BB]] +; CHECK: [[BB]]: +; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP0]], %[[ENTRY]] ], [ false, %[[BB_2]] ] +; CHECK-NEXT: ret i1 [[R]] +; +entry: + %cmp = call i8 @llvm.scmp(i16 %x, i16 %y) + switch i8 %c, label %bb_2 [ + i8 0, label %bb + i8 1, label %bb + ] + +bb_2: + br label %bb + +bb: + %phi = phi i8 [ %cmp, %entry ], [ %cmp, %entry ], [ 0, %bb_2 ] + %r = icmp slt i8 %phi, 0 + ret i1 %r +}